#include "ContentOpfParser.h" #include #include #include #include "../BookMetadataCache.h" namespace { constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; constexpr char itemCacheFile[] = "/.items.bin"; } // namespace bool ContentOpfParser::setup() { parser = XML_ParserCreate(nullptr); if (!parser) { Serial.printf("[%lu] [COF] Couldn't allocate memory for parser\n", millis()); return false; } XML_SetUserData(parser, this); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, characterData); return true; } ContentOpfParser::~ContentOpfParser() { if (parser) { XML_StopParser(parser, XML_FALSE); // Stop any pending processing XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks XML_SetCharacterDataHandler(parser, nullptr); XML_ParserFree(parser); parser = nullptr; } if (tempItemStore) { tempItemStore.close(); } if (SdMan.exists((cachePath + itemCacheFile).c_str())) { SdMan.remove((cachePath + itemCacheFile).c_str()); } } size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); } size_t ContentOpfParser::write(const uint8_t* buffer, const size_t size) { if (!parser) return 0; const uint8_t* currentBufferPos = buffer; auto remainingInBuffer = size; while (remainingInBuffer > 0) { void* const buf = XML_GetBuffer(parser, 1024); if (!buf) { Serial.printf("[%lu] [COF] Couldn't allocate memory for buffer\n", millis()); XML_StopParser(parser, XML_FALSE); // Stop any pending processing XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks XML_SetCharacterDataHandler(parser, nullptr); XML_ParserFree(parser); parser = nullptr; return 0; } const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; memcpy(buf, currentBufferPos, toRead); if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { Serial.printf("[%lu] [COF] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser), XML_ErrorString(XML_GetErrorCode(parser))); XML_StopParser(parser, XML_FALSE); // Stop any pending processing XML_SetElementHandler(parser, nullptr, nullptr); // Clear callbacks XML_SetCharacterDataHandler(parser, nullptr); XML_ParserFree(parser); parser = nullptr; return 0; } currentBufferPos += toRead; remainingInBuffer -= toRead; remainingSize -= toRead; } return size; } void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); (void)atts; if (self->state == START && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) { self->state = IN_PACKAGE; return; } if (self->state == IN_PACKAGE && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) { self->state = IN_METADATA; return; } if (self->state == IN_METADATA && strcmp(name, "dc:title") == 0) { self->state = IN_BOOK_TITLE; return; } if (self->state == IN_METADATA && strcmp(name, "dc:creator") == 0) { self->state = IN_BOOK_AUTHOR; return; } if (self->state == IN_METADATA && strcmp(name, "dc:language") == 0) { self->state = IN_BOOK_LANGUAGE; return; } if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) { self->state = IN_MANIFEST; if (!SdMan.openFileForWrite("COF", self->cachePath + itemCacheFile, self->tempItemStore)) { Serial.printf( "[%lu] [COF] Couldn't open temp items file for writing. This is probably going to be a fatal error.\n", millis()); } return; } if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) { self->state = IN_SPINE; if (!SdMan.openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) { Serial.printf( "[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n", millis()); } return; } if (self->state == IN_PACKAGE && (strcmp(name, "guide") == 0 || strcmp(name, "opf:guide") == 0)) { self->state = IN_GUIDE; // TODO Remove print Serial.printf("[%lu] [COF] Entering guide state.\n", millis()); if (!SdMan.openFileForRead("COF", self->cachePath + itemCacheFile, self->tempItemStore)) { Serial.printf( "[%lu] [COF] Couldn't open temp items file for reading. This is probably going to be a fatal error.\n", millis()); } return; } if (self->state == IN_METADATA && (strcmp(name, "meta") == 0 || strcmp(name, "opf:meta") == 0)) { bool isCover = false; std::string coverItemId; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "name") == 0 && strcmp(atts[i + 1], "cover") == 0) { isCover = true; } else if (strcmp(atts[i], "content") == 0) { coverItemId = atts[i + 1]; } } if (isCover) { self->coverItemId = coverItemId; } return; } if (self->state == IN_MANIFEST && (strcmp(name, "item") == 0 || strcmp(name, "opf:item") == 0)) { std::string itemId; std::string href; std::string mediaType; std::string properties; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "id") == 0) { itemId = atts[i + 1]; } else if (strcmp(atts[i], "href") == 0) { href = FsHelpers::normalisePath(self->baseContentPath + atts[i + 1]); } else if (strcmp(atts[i], "media-type") == 0) { mediaType = atts[i + 1]; } else if (strcmp(atts[i], "properties") == 0) { properties = atts[i + 1]; } } // Write items down to SD card serialization::writeString(self->tempItemStore, itemId); serialization::writeString(self->tempItemStore, href); if (itemId == self->coverItemId) { self->coverItemHref = href; } if (mediaType == MEDIA_TYPE_NCX) { if (self->tocNcxPath.empty()) { self->tocNcxPath = href; } else { Serial.printf("[%lu] [COF] Warning: Multiple NCX files found in manifest. Ignoring duplicate: %s\n", millis(), href.c_str()); } } // EPUB 3: Check for nav document (properties contains "nav") if (!properties.empty() && self->tocNavPath.empty()) { // Properties is space-separated, check if "nav" is present as a word if (properties == "nav" || properties.find("nav ") == 0 || properties.find(" nav") != std::string::npos) { self->tocNavPath = href; Serial.printf("[%lu] [COF] Found EPUB 3 nav document: %s\n", millis(), href.c_str()); } } return; } // NOTE: This relies on spine appearing after item manifest (which is pretty safe as it's part of the EPUB spec) // Only run the spine parsing if there's a cache to add it to if (self->cache) { if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) { for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "idref") == 0) { const std::string idref = atts[i + 1]; // Resolve the idref to href using items map // TODO: This lookup is slow as need to scan through all items each time. // It can take up to 200ms per item when getting to 1500 items. self->tempItemStore.seek(0); std::string itemId; std::string href; while (self->tempItemStore.available()) { serialization::readString(self->tempItemStore, itemId); serialization::readString(self->tempItemStore, href); if (itemId == idref) { self->cache->createSpineEntry(href); break; } } } } return; } } // parse the guide if (self->state == IN_GUIDE && (strcmp(name, "reference") == 0 || strcmp(name, "opf:reference") == 0)) { std::string type; std::string textHref; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "type") == 0) { type = atts[i + 1]; if (type == "text" || type == "start") { continue; } else { Serial.printf("[%lu] [COF] Skipping non-text reference in guide: %s\n", millis(), type.c_str()); break; } } else if (strcmp(atts[i], "href") == 0) { textHref = FsHelpers::normalisePath(self->baseContentPath + atts[i + 1]); } } if ((type == "text" || (type == "start" && !self->textReferenceHref.empty())) && (textHref.length() > 0)) { Serial.printf("[%lu] [COF] Found %s reference in guide: %s.\n", millis(), type.c_str(), textHref.c_str()); self->textReferenceHref = textHref; } return; } } void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s, const int len) { auto* self = static_cast(userData); if (self->state == IN_BOOK_TITLE) { self->title.append(s, len); return; } if (self->state == IN_BOOK_AUTHOR) { self->author.append(s, len); return; } if (self->state == IN_BOOK_LANGUAGE) { self->language.append(s, len); return; } } void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name) { auto* self = static_cast(userData); (void)name; if (self->state == IN_SPINE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) { self->state = IN_PACKAGE; self->tempItemStore.close(); return; } if (self->state == IN_GUIDE && (strcmp(name, "guide") == 0 || strcmp(name, "opf:guide") == 0)) { self->state = IN_PACKAGE; self->tempItemStore.close(); return; } if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) { self->state = IN_PACKAGE; self->tempItemStore.close(); return; } if (self->state == IN_BOOK_TITLE && strcmp(name, "dc:title") == 0) { self->state = IN_METADATA; return; } if (self->state == IN_BOOK_AUTHOR && strcmp(name, "dc:creator") == 0) { self->state = IN_METADATA; return; } if (self->state == IN_BOOK_LANGUAGE && strcmp(name, "dc:language") == 0) { self->state = IN_METADATA; return; } if (self->state == IN_METADATA && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) { self->state = IN_PACKAGE; return; } if (self->state == IN_PACKAGE && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) { self->state = START; return; } }