diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index dcfbe1e..80a28d9 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -7,247 +7,148 @@ #include #include "Epub/FsHelpers.h" +#include "Epub/parsers/ContainerParser.h" +#include "Epub/parsers/ContentOpfParser.h" +#include "Epub/parsers/TocNcxParser.h" -bool Epub::findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile) { - // open up the meta data to find where the content.opf file lives - size_t s; - const auto metaInfo = reinterpret_cast(zip.readFileToMemory("META-INF/container.xml", &s, true)); - if (!metaInfo) { - Serial.printf("[%lu] [EBP] Could not find META-INF/container.xml\n", millis()); +bool Epub::findContentOpfFile(std::string* contentOpfFile) const { + const auto containerPath = "META-INF/container.xml"; + size_t containerSize; + + // Get file size without loading it all into heap + if (!getItemSize(containerPath, &containerSize)) { + Serial.printf("[%lu] [EBP] Could not find or size META-INF/container.xml\n", millis()); return false; } - // parse the meta data - tinyxml2::XMLDocument metaDataDoc; - const auto result = metaDataDoc.Parse(metaInfo); - free(metaInfo); + ContainerParser containerParser(containerSize); - if (result != tinyxml2::XML_SUCCESS) { - Serial.printf("[%lu] [EBP] Could not parse META-INF/container.xml. Error: %d\n", millis(), result); + if (!containerParser.setup()) { return false; } - const auto container = metaDataDoc.FirstChildElement("container"); - if (!container) { - Serial.printf("[%lu] [EBP] Could not find container element in META-INF/container.xml\n", millis()); + // Stream read (reusing your existing stream logic) + if (!readItemContentsToStream(containerPath, containerParser, 512)) { + Serial.printf("[%lu] [EBP] Could not read META-INF/container.xml\n", millis()); + containerParser.teardown(); return false; } - const auto rootfiles = container->FirstChildElement("rootfiles"); - if (!rootfiles) { - Serial.printf("[%lu] [EBP] Could not find rootfiles element in META-INF/container.xml\n", millis()); + // Extract the result + if (containerParser.fullPath.empty()) { + Serial.printf("[%lu] [EBP] Could not find valid rootfile in container.xml\n", millis()); + containerParser.teardown(); return false; } - // find the root file that has the media-type="application/oebps-package+xml" - auto rootfile = rootfiles->FirstChildElement("rootfile"); - while (rootfile) { - const char* mediaType = rootfile->Attribute("media-type"); - if (mediaType && strcmp(mediaType, "application/oebps-package+xml") == 0) { - const char* full_path = rootfile->Attribute("full-path"); - if (full_path) { - contentOpfFile = full_path; - return true; - } - } - rootfile = rootfile->NextSiblingElement("rootfile"); - } + *contentOpfFile = std::move(containerParser.fullPath); - Serial.printf("[%lu] [EBP] Could not get path to content.opf file\n", millis()); - return false; -} - -bool Epub::parseContentOpf(ZipFile& zip, std::string& content_opf_file) { - // read in the content.opf file and parse it - auto contents = reinterpret_cast(zip.readFileToMemory(content_opf_file.c_str(), nullptr, true)); - - // parse the contents - tinyxml2::XMLDocument doc; - auto result = doc.Parse(contents); - free(contents); - - if (result != tinyxml2::XML_SUCCESS) { - Serial.printf("[%lu] [EBP] Error parsing content.opf - %s\n", millis(), - tinyxml2::XMLDocument::ErrorIDToName(result)); - return false; - } - - auto package = doc.FirstChildElement("package"); - if (!package) package = doc.FirstChildElement("opf:package"); - - if (!package) { - Serial.printf("[%lu] [EBP] Could not find package element in content.opf\n", millis()); - return false; - } - - // get the metadata - title and cover image - auto metadata = package->FirstChildElement("metadata"); - if (!metadata) metadata = package->FirstChildElement("opf:metadata"); - if (!metadata) { - Serial.printf("[%lu] [EBP] Missing metadata\n", millis()); - return false; - } - - auto titleEl = metadata->FirstChildElement("dc:title"); - if (!titleEl) { - Serial.printf("[%lu] [EBP] Missing title\n", millis()); - return false; - } - this->title = titleEl->GetText(); - - auto cover = metadata->FirstChildElement("meta"); - if (!cover) cover = metadata->FirstChildElement("opf:meta"); - while (cover && cover->Attribute("name") && strcmp(cover->Attribute("name"), "cover") != 0) { - cover = cover->NextSiblingElement("meta"); - } - if (!cover) { - Serial.printf("[%lu] [EBP] Missing cover\n", millis()); - } - auto coverItem = cover ? cover->Attribute("content") : nullptr; - - // read the manifest and spine - // the manifest gives us the names of the files - // the spine gives us the order of the files - // we can then read the files in the order they are in the spine - auto manifest = package->FirstChildElement("manifest"); - if (!manifest) manifest = package->FirstChildElement("opf:manifest"); - if (!manifest) { - Serial.printf("[%lu] [EBP] Missing manifest\n", millis()); - return false; - } - - // create a mapping from id to file name - auto item = manifest->FirstChildElement("item"); - if (!item) item = manifest->FirstChildElement("opf:item"); - std::map items; - - while (item) { - std::string itemId = item->Attribute("id"); - std::string href = contentBasePath + item->Attribute("href"); - - // grab the cover image - if (coverItem && itemId == coverItem) { - coverImageItem = href; - } - - // grab the ncx file - if (itemId == "ncx" || itemId == "ncxtoc") { - tocNcxItem = href; - } - - items[itemId] = href; - auto nextItem = item->NextSiblingElement("item"); - if (!nextItem) nextItem = item->NextSiblingElement("opf:item"); - item = nextItem; - } - - // find the spine - auto spineEl = package->FirstChildElement("spine"); - if (!spineEl) spineEl = package->FirstChildElement("opf:spine"); - if (!spineEl) { - Serial.printf("[%lu] [EBP] Missing spine\n", millis()); - return false; - } - - // read the spine - auto itemref = spineEl->FirstChildElement("itemref"); - if (!itemref) itemref = spineEl->FirstChildElement("opf:itemref"); - while (itemref) { - auto id = itemref->Attribute("idref"); - if (items.find(id) != items.end()) { - spine.emplace_back(id, items[id]); - } - auto nextItemRef = itemref->NextSiblingElement("itemref"); - if (!nextItemRef) nextItemRef = itemref->NextSiblingElement("opf:itemref"); - itemref = nextItemRef; - } + containerParser.teardown(); return true; } -bool Epub::parseTocNcxFile(const ZipFile& zip) { +bool Epub::parseContentOpf(const std::string& contentOpfFilePath) { + size_t contentOpfSize; + if (!getItemSize(contentOpfFilePath, &contentOpfSize)) { + Serial.printf("[%lu] [EBP] Could not get size of content.opf\n", millis()); + return false; + } + + ContentOpfParser opfParser(getBasePath(), contentOpfSize); + + if (!opfParser.setup()) { + Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis()); + return false; + } + + if (!readItemContentsToStream(contentOpfFilePath, opfParser, 1024)) { + Serial.printf("[%lu] [EBP] Could not read content.opf\n", millis()); + opfParser.teardown(); + return false; + } + + // Grab data from opfParser into epub + title = opfParser.title; + + if (opfParser.items.count("ncx")) { + tocNcxItem = opfParser.items.at("ncx"); + } else if (opfParser.items.count("ncxtoc")) { + tocNcxItem = opfParser.items.at("ncxtoc"); + } + + for (auto& spineRef : opfParser.spineRefs) { + if (opfParser.items.count(spineRef)) { + spine.emplace_back(spineRef, opfParser.items.at(spineRef)); + } + } + + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); + + opfParser.teardown(); + return true; +} + +bool Epub::parseTocNcxFile() { // the ncx file should have been specified in the content.opf file if (tocNcxItem.empty()) { Serial.printf("[%lu] [EBP] No ncx file specified\n", millis()); return false; } - const auto ncxData = reinterpret_cast(zip.readFileToMemory(tocNcxItem.c_str(), nullptr, true)); - if (!ncxData) { - Serial.printf("[%lu] [EBP] Could not find %s\n", millis(), tocNcxItem.c_str()); + size_t tocSize; + if (!getItemSize(tocNcxItem, &tocSize)) { + Serial.printf("[%lu] [EBP] Could not get size of toc ncx\n", millis()); return false; } - // Parse the Toc contents - tinyxml2::XMLDocument doc; - const auto result = doc.Parse(ncxData); - free(ncxData); + TocNcxParser ncxParser(contentBasePath, tocSize); - if (result != tinyxml2::XML_SUCCESS) { - Serial.printf("[%lu] [EBP] Error parsing toc %s\n", millis(), tinyxml2::XMLDocument::ErrorIDToName(result)); + if (!ncxParser.setup()) { + Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis()); return false; } - const auto ncx = doc.FirstChildElement("ncx"); - if (!ncx) { - Serial.printf("[%lu] [EBP] Could not find first child ncx in toc\n", millis()); + if (!readItemContentsToStream(tocNcxItem, ncxParser, 1024)) { + Serial.printf("[%lu] [EBP] Could not read toc ncx stream\n", millis()); + ncxParser.teardown(); return false; } - const auto navMap = ncx->FirstChildElement("navMap"); - if (!navMap) { - Serial.printf("[%lu] [EBP] Could not find navMap child in ncx\n", millis()); - return false; - } + this->toc = std::move(ncxParser.toc); - recursivelyParseNavMap(navMap->FirstChildElement("navPoint")); + Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size()); + + ncxParser.teardown(); return true; } -void Epub::recursivelyParseNavMap(tinyxml2::XMLElement* element) { - // Fills toc map - while (element) { - std::string navTitle = element->FirstChildElement("navLabel")->FirstChildElement("text")->FirstChild()->Value(); - const auto content = element->FirstChildElement("content"); - std::string href = contentBasePath + content->Attribute("src"); - // split the href on the # to get the href and the anchor - const size_t pos = href.find('#'); - std::string anchor; - - if (pos != std::string::npos) { - anchor = href.substr(pos + 1); - href = href.substr(0, pos); - } - - toc.emplace_back(navTitle, href, anchor, 0); - - tinyxml2::XMLElement* nestedNavPoint = element->FirstChildElement("navPoint"); - if (nestedNavPoint) { - recursivelyParseNavMap(nestedNavPoint); - } - element = element->NextSiblingElement("navPoint"); - } -} - // load in the meta data for the epub file bool Epub::load() { + Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); ZipFile zip("/sd" + filepath); - std::string contentOpfFile; - if (!findContentOpfFile(zip, contentOpfFile)) { - Serial.printf("[%lu] [EBP] Could not open ePub\n", millis()); + std::string contentOpfFilePath; + if (!findContentOpfFile(&contentOpfFilePath)) { + Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis()); return false; } - contentBasePath = contentOpfFile.substr(0, contentOpfFile.find_last_of('/') + 1); + Serial.printf("[%lu] [EBP] Found content.opf at: %s\n", millis(), contentOpfFilePath.c_str()); - if (!parseContentOpf(zip, contentOpfFile)) { + contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1); + + if (!parseContentOpf(contentOpfFilePath)) { + Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis()); return false; } - if (!parseTocNcxFile(zip)) { + if (!parseTocNcxFile()) { + Serial.printf("[%lu] [EBP] Could not parse toc\n", millis()); return false; } + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); + return true; } @@ -344,6 +245,13 @@ bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, con return zip.readFileToStream(path.c_str(), out, chunkSize); } +bool Epub::getItemSize(const std::string& itemHref, size_t* size) const { + const ZipFile zip("/sd" + filepath); + const std::string path = normalisePath(itemHref); + + return zip.getInflatedFileSize(path.c_str(), size); +} + int Epub::getSpineItemsCount() const { return spine.size(); } std::string& Epub::getSpineItem(const int spineIndex) { diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 5cdfee4..765eacc 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -1,22 +1,13 @@ #pragma once #include -#include #include #include #include -class ZipFile; +#include "Epub/EpubTocEntry.h" -class EpubTocEntry { - public: - std::string title; - std::string href; - std::string anchor; - int level; - EpubTocEntry(std::string title, std::string href, std::string anchor, const int level) - : title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {} -}; +class ZipFile; class Epub { // the title read from the EPUB meta data @@ -36,11 +27,9 @@ class Epub { // Uniq cache key based on filepath std::string cachePath; - // find the path for the content.opf file - static bool findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile); - bool parseContentOpf(ZipFile& zip, std::string& content_opf_file); - bool parseTocNcxFile(const ZipFile& zip); - void recursivelyParseNavMap(tinyxml2::XMLElement* element); + bool findContentOpfFile(std::string* contentOpfFile) const; + bool parseContentOpf(const std::string& contentOpfFilePath); + bool parseTocNcxFile(); public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -59,6 +48,7 @@ class Epub { uint8_t* readItemContentsToBytes(const std::string& itemHref, size_t* size = nullptr, bool trailingNullByte = false) const; bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const; + bool getItemSize(const std::string& itemHref, size_t* size) const; std::string& getSpineItem(int spineIndex); int getSpineItemsCount() const; EpubTocEntry& getTocItem(int tocTndex); diff --git a/lib/Epub/Epub/EpubTocEntry.h b/lib/Epub/Epub/EpubTocEntry.h new file mode 100644 index 0000000..715e4a4 --- /dev/null +++ b/lib/Epub/Epub/EpubTocEntry.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +class EpubTocEntry { + public: + std::string title; + std::string href; + std::string anchor; + int level; + EpubTocEntry(std::string title, std::string href, std::string anchor, const int level) + : title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {} +}; diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index b5eadf4..c8118a2 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -5,9 +5,9 @@ #include -#include "EpubHtmlParserSlim.h" #include "FsHelpers.h" #include "Page.h" +#include "parsers/ChapterHtmlSlimParser.h" constexpr uint8_t SECTION_FILE_VERSION = 4; @@ -127,9 +127,9 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath; - EpubHtmlParserSlim visitor(sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight, - marginBottom, marginLeft, - [this](std::unique_ptr page) { this->onPageComplete(std::move(page)); }); + ChapterHtmlSlimParser visitor(sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight, + marginBottom, marginLeft, + [this](std::unique_ptr page) { this->onPageComplete(std::move(page)); }); success = visitor.parseAndBuildPages(); SD.remove(tmpHtmlPath.c_str()); diff --git a/lib/Epub/Epub/EpubHtmlParserSlim.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp similarity index 85% rename from lib/Epub/Epub/EpubHtmlParserSlim.cpp rename to lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 9d7ef52..cafb1a5 100644 --- a/lib/Epub/Epub/EpubHtmlParserSlim.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -1,11 +1,11 @@ -#include "EpubHtmlParserSlim.h" +#include "ChapterHtmlSlimParser.h" #include #include #include -#include "Page.h" -#include "htmlEntities.h" +#include "../Page.h" +#include "../htmlEntities.h" const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"}; constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]); @@ -38,7 +38,7 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib } // start a new text block if needed -void EpubHtmlParserSlim::startNewTextBlock(const TextBlock::BLOCK_STYLE style) { +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::BLOCK_STYLE style) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { @@ -51,8 +51,8 @@ void EpubHtmlParserSlim::startNewTextBlock(const TextBlock::BLOCK_STYLE style) { currentTextBlock.reset(new ParsedText(style)); } -void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { - auto* self = static_cast(userData); +void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { + auto* self = static_cast(userData); (void)atts; // Middle of skip @@ -62,23 +62,7 @@ void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* na } if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { - // const char* src = element.Attribute("src"); - // if (src) { - // // don't leave an empty text block in the list - // // const BLOCK_STYLE style = currentTextBlock->get_style(); - // if (currentTextBlock->isEmpty()) { - // delete currentTextBlock; - // currentTextBlock = nullptr; - // } - // // TODO: Fix this - // // blocks.push_back(new ImageBlock(m_base_path + src)); - // // start a new text block - with the same style as before - // // startNewTextBlock(style); - // } else { - // // ESP_LOGE(TAG, "Could not find src attribute"); - // } - - // start skip + // TODO: Start processing image tags self->skipUntilDepth = self->depth; self->depth += 1; return; @@ -109,8 +93,8 @@ void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* na self->depth += 1; } -void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s, const int len) { - auto* self = static_cast(userData); +void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) { + auto* self = static_cast(userData); // Middle of skip if (self->skipUntilDepth < self->depth) { @@ -149,8 +133,8 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s } } -void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name) { - auto* self = static_cast(userData); +void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { + auto* self = static_cast(userData); (void)name; if (self->partWordBufferIndex > 0) { @@ -196,7 +180,7 @@ void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name } } -bool EpubHtmlParserSlim::parseAndBuildPages() { +bool ChapterHtmlSlimParser::parseAndBuildPages() { startNewTextBlock(TextBlock::JUSTIFIED); const XML_Parser parser = XML_ParserCreate(nullptr); @@ -261,7 +245,7 @@ bool EpubHtmlParserSlim::parseAndBuildPages() { return true; } -void EpubHtmlParserSlim::makePages() { +void ChapterHtmlSlimParser::makePages() { if (!currentTextBlock) { Serial.printf("[%lu] [EHP] !! No text block to make pages for !!\n", millis()); return; diff --git a/lib/Epub/Epub/EpubHtmlParserSlim.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h similarity index 74% rename from lib/Epub/Epub/EpubHtmlParserSlim.h rename to lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 0169fd8..1212ec6 100644 --- a/lib/Epub/Epub/EpubHtmlParserSlim.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -6,15 +6,15 @@ #include #include -#include "ParsedText.h" -#include "blocks/TextBlock.h" +#include "../ParsedText.h" +#include "../blocks/TextBlock.h" class Page; class GfxRenderer; #define MAX_WORD_SIZE 200 -class EpubHtmlParserSlim { +class ChapterHtmlSlimParser { const char* filepath; GfxRenderer& renderer; std::function)> completePageFn; @@ -44,10 +44,10 @@ class EpubHtmlParserSlim { static void XMLCALL endElement(void* userData, const XML_Char* name); public: - explicit EpubHtmlParserSlim(const char* filepath, GfxRenderer& renderer, const int fontId, - const float lineCompression, const int marginTop, const int marginRight, - const int marginBottom, const int marginLeft, - const std::function)>& completePageFn) + explicit ChapterHtmlSlimParser(const char* filepath, GfxRenderer& renderer, const int fontId, + const float lineCompression, const int marginTop, const int marginRight, + const int marginBottom, const int marginLeft, + const std::function)>& completePageFn) : filepath(filepath), renderer(renderer), fontId(fontId), @@ -57,6 +57,6 @@ class EpubHtmlParserSlim { marginBottom(marginBottom), marginLeft(marginLeft), completePageFn(completePageFn) {} - ~EpubHtmlParserSlim() = default; + ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages(); }; diff --git a/lib/Epub/Epub/parsers/ContainerParser.cpp b/lib/Epub/Epub/parsers/ContainerParser.cpp new file mode 100644 index 0000000..b7ff5d1 --- /dev/null +++ b/lib/Epub/Epub/parsers/ContainerParser.cpp @@ -0,0 +1,96 @@ +#include "ContainerParser.h" + +#include + +bool ContainerParser::setup() { + parser = XML_ParserCreate(nullptr); + if (!parser) { + Serial.printf("[%lu] [CTR] Couldn't allocate memory for parser\n", millis()); + return false; + } + + XML_SetUserData(parser, this); + XML_SetElementHandler(parser, startElement, endElement); + return true; +} + +bool ContainerParser::teardown() { + if (parser) { + XML_ParserFree(parser); + parser = nullptr; + } + return true; +} + +size_t ContainerParser::write(const uint8_t data) { return write(&data, 1); } + +size_t ContainerParser::write(const uint8_t* buffer, const size_t size) { + if (!parser) return 0; + + const uint8_t* currentBufferPos = buffer; + auto remainingInBuffer = size; + + while (remainingInBuffer > 0) { + void* const buf = XML_GetBuffer(parser, 1024); + if (!buf) { + Serial.printf("[%lu] [CTR] Couldn't allocate buffer\n", millis()); + return 0; + } + + const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; + memcpy(buf, currentBufferPos, toRead); + + if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { + Serial.printf("[%lu] [CTR] Parse error: %s\n", millis(), XML_ErrorString(XML_GetErrorCode(parser))); + return 0; + } + + currentBufferPos += toRead; + remainingInBuffer -= toRead; + remainingSize -= toRead; + } + return size; +} + +void XMLCALL ContainerParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { + auto* self = static_cast(userData); + + // Simple state tracking to ensure we are looking at the valid schema structure + if (self->state == START && strcmp(name, "container") == 0) { + self->state = IN_CONTAINER; + return; + } + + if (self->state == IN_CONTAINER && strcmp(name, "rootfiles") == 0) { + self->state = IN_ROOTFILES; + return; + } + + if (self->state == IN_ROOTFILES && strcmp(name, "rootfile") == 0) { + const char* mediaType = nullptr; + const char* path = nullptr; + + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "media-type") == 0) { + mediaType = atts[i + 1]; + } else if (strcmp(atts[i], "full-path") == 0) { + path = atts[i + 1]; + } + } + + // Check if this is the standard OEBPS package + if (mediaType && path && strcmp(mediaType, "application/oebps-package+xml") == 0) { + self->fullPath = path; + } + } +} + +void XMLCALL ContainerParser::endElement(void* userData, const XML_Char* name) { + auto* self = static_cast(userData); + + if (self->state == IN_ROOTFILES && strcmp(name, "rootfiles") == 0) { + self->state = IN_CONTAINER; + } else if (self->state == IN_CONTAINER && strcmp(name, "container") == 0) { + self->state = START; + } +} diff --git a/lib/Epub/Epub/parsers/ContainerParser.h b/lib/Epub/Epub/parsers/ContainerParser.h new file mode 100644 index 0000000..07e28ab --- /dev/null +++ b/lib/Epub/Epub/parsers/ContainerParser.h @@ -0,0 +1,32 @@ +#pragma once +#include + +#include + +#include "expat.h" + +class ContainerParser final : public Print { + enum ParserState { + START, + IN_CONTAINER, + IN_ROOTFILES, + }; + + size_t remainingSize; + XML_Parser parser = nullptr; + ParserState state = START; + + static void startElement(void* userData, const XML_Char* name, const XML_Char** atts); + static void endElement(void* userData, const XML_Char* name); + + public: + std::string fullPath; + + explicit ContainerParser(const size_t xmlSize) : remainingSize(xmlSize) {} + + bool setup(); + bool teardown(); + + size_t write(uint8_t) override; + size_t write(const uint8_t* buffer, size_t size) override; +}; diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp new file mode 100644 index 0000000..1dcdb04 --- /dev/null +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -0,0 +1,161 @@ +#include "ContentOpfParser.h" + +#include +#include + +bool ContentOpfParser::setup() { + parser = XML_ParserCreate(nullptr); + if (!parser) { + Serial.printf("[%lu] [COF] Couldn't allocate memory for parser\n", millis()); + return false; + } + + XML_SetUserData(parser, this); + XML_SetElementHandler(parser, startElement, endElement); + XML_SetCharacterDataHandler(parser, characterData); + return true; +} + +bool ContentOpfParser::teardown() { + if (parser) { + XML_ParserFree(parser); + parser = nullptr; + } + return true; +} + +size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); } + +size_t ContentOpfParser::write(const uint8_t* buffer, const size_t size) { + if (!parser) return 0; + + const uint8_t* currentBufferPos = buffer; + auto remainingInBuffer = size; + + while (remainingInBuffer > 0) { + void* const buf = XML_GetBuffer(parser, 1024); + + if (!buf) { + Serial.printf("[%lu] [COF] Couldn't allocate memory for buffer\n", millis()); + XML_ParserFree(parser); + parser = nullptr; + return 0; + } + + const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; + memcpy(buf, currentBufferPos, toRead); + + if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { + Serial.printf("[%lu] [COF] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); + XML_ParserFree(parser); + parser = nullptr; + return 0; + } + + currentBufferPos += toRead; + remainingInBuffer -= toRead; + remainingSize -= toRead; + } + + return size; +} + +void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { + auto* self = static_cast(userData); + (void)atts; + + if (self->state == START && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) { + self->state = IN_PACKAGE; + return; + } + + if (self->state == IN_PACKAGE && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) { + self->state = IN_METADATA; + return; + } + + if (self->state == IN_METADATA && strcmp(name, "dc:title") == 0) { + self->state = IN_BOOK_TITLE; + return; + } + + if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) { + self->state = IN_MANIFEST; + return; + } + + if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) { + self->state = IN_SPINE; + return; + } + + // TODO: Support book cover + // if (self->state == IN_METADATA && (strcmp(name, "meta") == 0 || strcmp(name, "opf:meta") == 0)) { + // } + + if (self->state == IN_MANIFEST && (strcmp(name, "item") == 0 || strcmp(name, "opf:item") == 0)) { + std::string itemId; + std::string href; + + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "id") == 0) { + itemId = atts[i + 1]; + } else if (strcmp(atts[i], "href") == 0) { + href = self->baseContentPath + atts[i + 1]; + } + } + + self->items[itemId] = href; + return; + } + + if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) { + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "idref") == 0) { + self->spineRefs.emplace_back(atts[i + 1]); + break; + } + } + return; + } +} + +void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s, const int len) { + auto* self = static_cast(userData); + + if (self->state == IN_BOOK_TITLE) { + self->title.append(s, len); + return; + } +} + +void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name) { + auto* self = static_cast(userData); + (void)name; + + if (self->state == IN_SPINE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) { + self->state = IN_PACKAGE; + return; + } + + if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) { + self->state = IN_PACKAGE; + return; + } + + if (self->state == IN_BOOK_TITLE && strcmp(name, "dc:title") == 0) { + self->state = IN_METADATA; + return; + } + + if (self->state == IN_METADATA && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) { + self->state = IN_PACKAGE; + return; + } + + if (self->state == IN_PACKAGE && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) { + self->state = START; + return; + } +} diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h new file mode 100644 index 0000000..5da16bd --- /dev/null +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -0,0 +1,42 @@ +#pragma once +#include + +#include + +#include "Epub.h" +#include "expat.h" + +class ContentOpfParser final : public Print { + enum ParserState { + START, + IN_PACKAGE, + IN_METADATA, + IN_BOOK_TITLE, + IN_MANIFEST, + IN_SPINE, + }; + + const std::string& baseContentPath; + size_t remainingSize; + XML_Parser parser = nullptr; + ParserState state = START; + + static void startElement(void* userData, const XML_Char* name, const XML_Char** atts); + static void characterData(void* userData, const XML_Char* s, int len); + static void endElement(void* userData, const XML_Char* name); + + public: + std::string title; + std::string tocNcxPath; + std::map items; + std::vector spineRefs; + + explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize) + : baseContentPath(baseContentPath), remainingSize(xmlSize) {} + + bool setup(); + bool teardown(); + + size_t write(uint8_t) override; + size_t write(const uint8_t* buffer, size_t size) override; +}; diff --git a/lib/Epub/Epub/parsers/TocNcxParser.cpp b/lib/Epub/Epub/parsers/TocNcxParser.cpp new file mode 100644 index 0000000..f02d7c4 --- /dev/null +++ b/lib/Epub/Epub/parsers/TocNcxParser.cpp @@ -0,0 +1,165 @@ +#include "TocNcxParser.h" + +#include + +bool TocNcxParser::setup() { + parser = XML_ParserCreate(nullptr); + if (!parser) { + Serial.printf("[%lu] [TOC] Couldn't allocate memory for parser\n", millis()); + return false; + } + + XML_SetUserData(parser, this); + XML_SetElementHandler(parser, startElement, endElement); + XML_SetCharacterDataHandler(parser, characterData); + return true; +} + +bool TocNcxParser::teardown() { + if (parser) { + XML_ParserFree(parser); + parser = nullptr; + } + return true; +} + +size_t TocNcxParser::write(const uint8_t data) { return write(&data, 1); } + +size_t TocNcxParser::write(const uint8_t* buffer, const size_t size) { + if (!parser) return 0; + + const uint8_t* currentBufferPos = buffer; + auto remainingInBuffer = size; + + while (remainingInBuffer > 0) { + void* const buf = XML_GetBuffer(parser, 1024); + if (!buf) { + Serial.printf("[%lu] [TOC] Couldn't allocate memory for buffer\n", millis()); + return 0; + } + + const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; + memcpy(buf, currentBufferPos, toRead); + + if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { + Serial.printf("[%lu] [TOC] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); + return 0; + } + + currentBufferPos += toRead; + remainingInBuffer -= toRead; + remainingSize -= toRead; + } + return size; +} + +void XMLCALL TocNcxParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { + // NOTE: We rely on navPoint label and content coming before any nested navPoints, this will be fine: + // + // Chapter 1 + // + // ...nested... + // + // + // This will NOT: + // + // ...nested... + // Chapter 1 + // + // + + auto* self = static_cast(userData); + + if (self->state == START && strcmp(name, "ncx") == 0) { + self->state = IN_NCX; + return; + } + + if (self->state == IN_NCX && strcmp(name, "navMap") == 0) { + self->state = IN_NAV_MAP; + return; + } + + // Handles both top-level and nested navPoints + if ((self->state == IN_NAV_MAP || self->state == IN_NAV_POINT) && strcmp(name, "navPoint") == 0) { + self->state = IN_NAV_POINT; + self->currentDepth++; + + self->currentLabel.clear(); + self->currentSrc.clear(); + return; + } + + if (self->state == IN_NAV_POINT && strcmp(name, "navLabel") == 0) { + self->state = IN_NAV_LABEL; + return; + } + + if (self->state == IN_NAV_LABEL && strcmp(name, "text") == 0) { + self->state = IN_NAV_LABEL_TEXT; + return; + } + + if (self->state == IN_NAV_POINT && strcmp(name, "content") == 0) { + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "src") == 0) { + self->currentSrc = atts[i + 1]; + break; + } + } + return; + } +} + +void XMLCALL TocNcxParser::characterData(void* userData, const XML_Char* s, const int len) { + auto* self = static_cast(userData); + if (self->state == IN_NAV_LABEL_TEXT) { + self->currentLabel.append(s, len); + } +} + +void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) { + auto* self = static_cast(userData); + + if (self->state == IN_NAV_LABEL_TEXT && strcmp(name, "text") == 0) { + self->state = IN_NAV_LABEL; + return; + } + + if (self->state == IN_NAV_LABEL && strcmp(name, "navLabel") == 0) { + self->state = IN_NAV_POINT; + return; + } + + if (self->state == IN_NAV_POINT && strcmp(name, "navPoint") == 0) { + self->currentDepth--; + if (self->currentDepth == 0) { + self->state = IN_NAV_MAP; + } + return; + } + + if (self->state == IN_NAV_POINT && strcmp(name, "content") == 0) { + // At this point (end of content tag), we likely have both Label (from previous tags) and Src. + // This is the safest place to push the data, assuming always comes before . + // NCX spec says navLabel comes before content. + if (!self->currentLabel.empty() && !self->currentSrc.empty()) { + std::string href = self->baseContentPath + self->currentSrc; + std::string anchor; + + const size_t pos = href.find('#'); + if (pos != std::string::npos) { + anchor = href.substr(pos + 1); + href = href.substr(0, pos); + } + + // Push to vector + self->toc.emplace_back(self->currentLabel, href, anchor, self->currentDepth); + + // Clear them so we don't re-add them if there are weird XML structures + self->currentLabel.clear(); + self->currentSrc.clear(); + } + } +} diff --git a/lib/Epub/Epub/parsers/TocNcxParser.h b/lib/Epub/Epub/parsers/TocNcxParser.h new file mode 100644 index 0000000..6217f3f --- /dev/null +++ b/lib/Epub/Epub/parsers/TocNcxParser.h @@ -0,0 +1,37 @@ +#pragma once +#include + +#include +#include + +#include "Epub/EpubTocEntry.h" +#include "expat.h" + +class TocNcxParser final : public Print { + enum ParserState { START, IN_NCX, IN_NAV_MAP, IN_NAV_POINT, IN_NAV_LABEL, IN_NAV_LABEL_TEXT, IN_CONTENT }; + + const std::string& baseContentPath; + size_t remainingSize; + XML_Parser parser = nullptr; + ParserState state = START; + + std::string currentLabel; + std::string currentSrc; + size_t currentDepth = 0; + + static void startElement(void* userData, const XML_Char* name, const XML_Char** atts); + static void characterData(void* userData, const XML_Char* s, int len); + static void endElement(void* userData, const XML_Char* name); + + public: + std::vector toc; + + explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize) + : baseContentPath(baseContentPath), remainingSize(xmlSize) {} + + bool setup(); + bool teardown(); + + size_t write(uint8_t) override; + size_t write(const uint8_t* buffer, size_t size) override; +}; diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index c6a7cc3..dd33264 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -162,9 +162,7 @@ int GfxRenderer::getLineHeight(const int fontId) const { return fontMap.at(fontId).getData(REGULAR)->advanceY; } -uint8_t *GfxRenderer::getFrameBuffer() const { - return einkDisplay.getFrameBuffer(); -} +uint8_t* GfxRenderer::getFrameBuffer() const { return einkDisplay.getFrameBuffer(); } void GfxRenderer::swapBuffers() const { einkDisplay.swapBuffers(); } diff --git a/lib/ZipFile/ZipFile.cpp b/lib/ZipFile/ZipFile.cpp index 29e2469..30b44f8 100644 --- a/lib/ZipFile/ZipFile.cpp +++ b/lib/ZipFile/ZipFile.cpp @@ -40,7 +40,7 @@ bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileS // find the file mz_uint32 fileIndex = 0; if (!mz_zip_reader_locate_file_v2(&zipArchive, filename, nullptr, 0, &fileIndex)) { - Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis, filename); + Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis(), filename); mz_zip_reader_end(&zipArchive); return false; } @@ -82,6 +82,16 @@ long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const { return fileOffset + localHeaderSize + filenameLength + extraOffset; } +bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) const { + mz_zip_archive_file_stat fileStat; + if (!loadFileStat(filename, &fileStat)) { + return false; + } + + *size = static_cast(fileStat.m_uncomp_size); + return true; +} + uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) const { mz_zip_archive_file_stat fileStat; if (!loadFileStat(filename, &fileStat)) { @@ -268,7 +278,14 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch // Write output chunk if (outBytes > 0) { processedOutputBytes += outBytes; - out.write(outputBuffer + outputCursor, outBytes); + if (out.write(outputBuffer + outputCursor, outBytes) != outBytes) { + Serial.printf("[%lu] [ZIP] Failed to write all output bytes to stream\n", millis()); + fclose(file); + free(outputBuffer); + free(fileReadBuffer); + free(inflator); + return false; + } // Update output position in buffer (with wraparound) outputCursor = (outputCursor + outBytes) & (TINFL_LZ_DICT_SIZE - 1); } diff --git a/lib/ZipFile/ZipFile.h b/lib/ZipFile/ZipFile.h index 36ac7c4..e452ec5 100644 --- a/lib/ZipFile/ZipFile.h +++ b/lib/ZipFile/ZipFile.h @@ -14,6 +14,7 @@ class ZipFile { public: explicit ZipFile(std::string filePath) : filePath(std::move(filePath)) {} ~ZipFile() = default; + bool getInflatedFileSize(const char* filename, size_t* size) const; uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false) const; bool readFileToStream(const char* filename, Print& out, size_t chunkSize) const; }; diff --git a/platformio.ini b/platformio.ini index a74fc4e..bb2a52c 100644 --- a/platformio.ini +++ b/platformio.ini @@ -29,7 +29,6 @@ board_build.partitions = partitions.csv ; Libraries lib_deps = - https://github.com/leethomason/tinyxml2.git#11.0.0 BatteryMonitor=symlink://open-x4-sdk/libs/hardware/BatteryMonitor InputManager=symlink://open-x4-sdk/libs/hardware/InputManager EInkDisplay=symlink://open-x4-sdk/libs/display/EInkDisplay diff --git a/src/screens/EpubReaderScreen.cpp b/src/screens/EpubReaderScreen.cpp index 373628a..c6dd552 100644 --- a/src/screens/EpubReaderScreen.cpp +++ b/src/screens/EpubReaderScreen.cpp @@ -163,7 +163,7 @@ void EpubReaderScreen::renderScreen() { const int w = textWidth + margin * 2; const int h = renderer.getLineHeight(READER_FONT_ID) + margin * 2; renderer.grayscaleRevert(); - uint8_t *fb1 = renderer.getFrameBuffer(); + uint8_t* fb1 = renderer.getFrameBuffer(); renderer.swapBuffers(); memcpy(fb1, renderer.getFrameBuffer(), EInkDisplay::BUFFER_SIZE); renderer.fillRect(x, y, w, h, 0);