diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index d1855cc..3b75453 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -128,6 +128,10 @@ bool Epub::load() { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); ZipFile zip("/sd" + filepath); + if (!footnotePages) { + footnotePages = new std::unordered_set(); + } + std::string contentOpfFilePath; if (!findContentOpfFile(&contentOpfFilePath)) { Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis()); @@ -265,17 +269,32 @@ bool Epub::getItemSize(const std::string& itemHref, size_t* size) const { return zip.getInflatedFileSize(path.c_str(), size); } -int Epub::getSpineItemsCount() const { return spine.size(); } +int Epub::getSpineItemsCount() const { + int virtualCount = virtualSpineItems ? virtualSpineItems->size() : 0; + return spine.size() + virtualCount; +} size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { return cumulativeSpineItemSize.at(spineIndex); } -std::string& Epub::getSpineItem(const int spineIndex) { - if (spineIndex < 0 || spineIndex >= spine.size()) { - Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex); - return spine.at(0).second; +std::string Epub::getSpineItem(const int spineIndex) const { + // Normal spine item + if (spineIndex >= 0 && spineIndex < static_cast(spine.size())) { + return contentBasePath + spine.at(spineIndex).second; } - return spine.at(spineIndex).second; + // Virtual spine item + if (virtualSpineItems) { + int virtualIndex = spineIndex - spine.size(); + if (virtualIndex >= 0 && virtualIndex < static_cast(virtualSpineItems->size())) { + return (*virtualSpineItems)[virtualIndex]; + } + } + + Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex); + + // Return empty string instead of reference to avoid issues + static std::string emptyString = ""; + return emptyString; } EpubTocEntry& Epub::getTocItem(const int tocTndex) { @@ -305,6 +324,11 @@ int Epub::getSpineIndexForTocIndex(const int tocIndex) const { } int Epub::getTocIndexForSpineIndex(const int spineIndex) const { + // Skip virtual spine items + if (isVirtualSpineItem(spineIndex)) { + return -1; + } + // the toc entry should have an href that matches the spine item // so we can find the toc index by looking for the href for (int i = 0; i < toc.size(); i++) { @@ -317,6 +341,80 @@ int Epub::getTocIndexForSpineIndex(const int spineIndex) const { return -1; } +void Epub::markAsFootnotePage(const std::string& href) { + // Lazy initialization + if (!footnotePages) { + footnotePages = new std::unordered_set(); + } + + // Extract filename from href (remove #anchor if present) + size_t hashPos = href.find('#'); + std::string filename = (hashPos != std::string::npos) ? href.substr(0, hashPos) : href; + + // Extract just the filename without path + size_t lastSlash = filename.find_last_of('/'); + if (lastSlash != std::string::npos) { + filename = filename.substr(lastSlash + 1); + } + + footnotePages->insert(filename); + Serial.printf("[%lu] [EPUB] Marked as footnote page: %s\n", millis(), filename.c_str()); +} + +bool Epub::isFootnotePage(const std::string& filename) const { + if (!footnotePages) return false; + return footnotePages->find(filename) != footnotePages->end(); +} + +bool Epub::shouldHideFromToc(int spineIndex) const { + // Always hide virtual spine items + if (isVirtualSpineItem(spineIndex)) { + return true; + } + + if (spineIndex < 0 || spineIndex >= spine.size()) { + return true; + } + + const std::string& spineItem = spine[spineIndex].second; + + // Extract filename from spine item + size_t lastSlash = spineItem.find_last_of('/'); + std::string filename = (lastSlash != std::string::npos) ? spineItem.substr(lastSlash + 1) : spineItem; + + return isFootnotePage(filename); +} + +// Virtual spine items +int Epub::addVirtualSpineItem(const std::string& path) { + // Lazy initialization + if (!virtualSpineItems) { + virtualSpineItems = new std::vector(); + } + + virtualSpineItems->push_back(path); + int newIndex = spine.size() + virtualSpineItems->size() - 1; + Serial.printf("[%lu] [EPUB] Added virtual spine item: %s (index %d)\n", millis(), path.c_str(), newIndex); + return newIndex; +} + +bool Epub::isVirtualSpineItem(int spineIndex) const { return spineIndex >= static_cast(spine.size()); } + +int Epub::findVirtualSpineIndex(const std::string& filename) const { + if (!virtualSpineItems) return -1; + + for (size_t i = 0; i < virtualSpineItems->size(); i++) { + std::string virtualPath = (*virtualSpineItems)[i]; + size_t lastSlash = virtualPath.find_last_of('/'); + std::string virtualFilename = (lastSlash != std::string::npos) ? virtualPath.substr(lastSlash + 1) : virtualPath; + + if (virtualFilename == filename) { + return spine.size() + i; + } + } + return -1; +} + size_t Epub::getBookSize() const { return getCumulativeSpineItemSize(getSpineItemsCount() - 1); } // Calculate progress in book @@ -326,4 +424,4 @@ uint8_t Epub::calculateProgress(const int currentSpineIndex, const float current size_t bookSize = getBookSize(); size_t sectionProgSize = currentSpineRead * curChapterSize; return round(static_cast(prevChapterSize + sectionProgSize) / bookSize * 100.0); -} +} \ No newline at end of file diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 1f2dfa9..3f67b35 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -3,6 +3,7 @@ #include #include +#include #include #include "Epub/EpubTocEntry.h" @@ -10,35 +11,37 @@ class ZipFile; class Epub { - // the title read from the EPUB meta data std::string title; - // the cover image std::string coverImageItem; - // the ncx file std::string tocNcxItem; - // where is the EPUBfile? std::string filepath; - // the spine of the EPUB file std::vector> spine; // the file size of the spine items (proxy to book progress) std::vector cumulativeSpineItemSize; // the toc of the EPUB file std::vector toc; - // the base path for items in the EPUB file std::string contentBasePath; - // Uniq cache key based on filepath std::string cachePath; + // Use pointers, allocate only if needed + std::unordered_set* footnotePages; + std::vector* virtualSpineItems; + bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(const std::string& contentOpfFilePath); bool parseTocNcxFile(); public: - explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { - // create a cache key based on the filepath + explicit Epub(std::string filepath, const std::string& cacheDir) + : filepath(std::move(filepath)), footnotePages(nullptr), virtualSpineItems(nullptr) { cachePath = cacheDir + "/epub_" + std::to_string(std::hash{}(this->filepath)); } - ~Epub() = default; + + ~Epub() { + delete footnotePages; + delete virtualSpineItems; + } + std::string& getBasePath() { return contentBasePath; } bool load(); bool clearCache() const; @@ -51,7 +54,7 @@ class Epub { bool trailingNullByte = false) const; bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const; bool getItemSize(const std::string& itemHref, size_t* size) const; - std::string& getSpineItem(int spineIndex); + std::string getSpineItem(int index) const; int getSpineItemsCount() const; size_t getCumulativeSpineItemSize(const int spineIndex) const; EpubTocEntry& getTocItem(int tocIndex); @@ -59,6 +62,13 @@ class Epub { int getSpineIndexForTocIndex(int tocIndex) const; int getTocIndexForSpineIndex(int spineIndex) const; + void markAsFootnotePage(const std::string& href); + bool isFootnotePage(const std::string& filename) const; + bool shouldHideFromToc(int spineIndex) const; + int addVirtualSpineItem(const std::string& path); + bool isVirtualSpineItem(int spineIndex) const; + int findVirtualSpineIndex(const std::string& filename) const; + size_t getBookSize() const; uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead); }; diff --git a/lib/Epub/Epub/FootnoteEntry.h b/lib/Epub/Epub/FootnoteEntry.h new file mode 100644 index 0000000..39f0c26 --- /dev/null +++ b/lib/Epub/Epub/FootnoteEntry.h @@ -0,0 +1,12 @@ +#pragma once + +struct FootnoteEntry { + char number[3]; + char href[64]; + bool isInline; + + FootnoteEntry() : isInline(false) { + number[0] = '\0'; + href[0] = '\0'; + } +}; diff --git a/lib/Epub/Epub/Page.cpp b/lib/Epub/Epub/Page.cpp index 01bb3ac..c886eae 100644 --- a/lib/Epub/Epub/Page.cpp +++ b/lib/Epub/Epub/Page.cpp @@ -12,8 +12,6 @@ void PageLine::render(GfxRenderer& renderer, const int fontId) { block->render(r void PageLine::serialize(std::ostream& os) { serialization::writePod(os, xPos); serialization::writePod(os, yPos); - - // serialize TextBlock pointed to by PageLine block->serialize(os); } @@ -28,21 +26,26 @@ std::unique_ptr PageLine::deserialize(std::istream& is) { } void Page::render(GfxRenderer& renderer, const int fontId) const { - for (auto& element : elements) { - element->render(renderer, fontId); + for (int i = 0; i < elementCount; i++) { + elements[i]->render(renderer, fontId); } } void Page::serialize(std::ostream& os) const { serialization::writePod(os, PAGE_FILE_VERSION); + serialization::writePod(os, static_cast(elementCount)); - const uint32_t count = elements.size(); - serialization::writePod(os, count); - - for (const auto& el : elements) { - // Only PageLine exists currently + for (int i = 0; i < elementCount; i++) { serialization::writePod(os, static_cast(TAG_PageLine)); - el->serialize(os); + elements[i]->serialize(os); + } + + serialization::writePod(os, static_cast(footnoteCount)); + for (int i = 0; i < footnoteCount; i++) { + os.write(footnotes[i].number, 3); + os.write(footnotes[i].href, 64); + uint8_t isInlineFlag = footnotes[i].isInline ? 1 : 0; + os.write(reinterpret_cast(&isInlineFlag), 1); } } @@ -59,18 +62,30 @@ std::unique_ptr Page::deserialize(std::istream& is) { uint32_t count; serialization::readPod(is, count); - for (uint32_t i = 0; i < count; i++) { + for (uint32_t i = 0; i < count && i < page->elementCapacity; i++) { uint8_t tag; serialization::readPod(is, tag); if (tag == TAG_PageLine) { auto pl = PageLine::deserialize(is); - page->elements.push_back(std::move(pl)); + page->addElement(std::move(pl)); } else { Serial.printf("[%lu] [PGE] Deserialization failed: Unknown tag %u\n", millis(), tag); return nullptr; } } + int32_t footnoteCount; + serialization::readPod(is, footnoteCount); + page->footnoteCount = (footnoteCount < page->footnoteCapacity) ? footnoteCount : page->footnoteCapacity; + + for (int i = 0; i < page->footnoteCount; i++) { + is.read(page->footnotes[i].number, 3); + is.read(page->footnotes[i].href, 64); + uint8_t isInlineFlag = 0; + is.read(reinterpret_cast(&isInlineFlag), 1); + page->footnotes[i].isInline = (isInlineFlag != 0); + } + return page; } diff --git a/lib/Epub/Epub/Page.h b/lib/Epub/Epub/Page.h index 59333ce..4510fbf 100644 --- a/lib/Epub/Epub/Page.h +++ b/lib/Epub/Epub/Page.h @@ -1,14 +1,16 @@ #pragma once +#include +#include +#include #include -#include +#include "FootnoteEntry.h" #include "blocks/TextBlock.h" enum PageElementTag : uint8_t { TAG_PageLine = 1, }; -// represents something that has been added to a page class PageElement { public: int16_t xPos; @@ -19,7 +21,6 @@ class PageElement { virtual void serialize(std::ostream& os) = 0; }; -// a line from a block element class PageLine final : public PageElement { std::shared_ptr block; @@ -32,10 +33,68 @@ class PageLine final : public PageElement { }; class Page { + private: + std::shared_ptr* elements; + int elementCapacity; + + FootnoteEntry* footnotes; + int footnoteCapacity; + public: - // the list of block index and line numbers on this page - std::vector> elements; + int elementCount; + int footnoteCount; + + Page() : elementCount(0), footnoteCount(0) { + elementCapacity = 24; + elements = new std::shared_ptr[elementCapacity]; + + footnoteCapacity = 16; + footnotes = new FootnoteEntry[footnoteCapacity]; + for (int i = 0; i < footnoteCapacity; i++) { + footnotes[i].number[0] = '\0'; + footnotes[i].href[0] = '\0'; + } + } + + ~Page() { + delete[] elements; + delete[] footnotes; + } + + Page(const Page&) = delete; + Page& operator=(const Page&) = delete; + + void addElement(std::shared_ptr element) { + if (elementCount < elementCapacity) { + elements[elementCount++] = element; + } + } + + void addFootnote(const char* number, const char* href) { + if (footnoteCount < footnoteCapacity) { + strncpy(footnotes[footnoteCount].number, number, 2); + footnotes[footnoteCount].number[2] = '\0'; + strncpy(footnotes[footnoteCount].href, href, 63); + footnotes[footnoteCount].href[63] = '\0'; + footnoteCount++; + } + } + + std::shared_ptr getElement(int index) const { + if (index >= 0 && index < elementCount) { + return elements[index]; + } + return nullptr; + } + + FootnoteEntry* getFootnote(int index) { + if (index >= 0 && index < footnoteCount) { + return &footnotes[index]; + } + return nullptr; + } + void render(GfxRenderer& renderer, int fontId) const; void serialize(std::ostream& os) const; static std::unique_ptr deserialize(std::istream& is); -}; +}; \ No newline at end of file diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 7c9d241..3bbcb88 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "FsHelpers.h" #include "Page.h" @@ -13,6 +14,61 @@ namespace { constexpr uint8_t SECTION_FILE_VERSION = 5; } +// Helper function to write XML-escaped text directly to file +static bool writeEscapedXml(File& file, const char* text) { + if (!text) return true; + + // Use a static buffer to avoid heap allocation + static char buffer[2048]; + int bufferPos = 0; + + while (*text && bufferPos < sizeof(buffer) - 10) { // Leave margin for entities + unsigned char c = (unsigned char)*text; + + // Only escape the 5 XML special characters + if (c == '<') { + if (bufferPos + 4 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "<", 4); + bufferPos += 4; + } + } else if (c == '>') { + if (bufferPos + 4 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], ">", 4); + bufferPos += 4; + } + } else if (c == '&') { + if (bufferPos + 5 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "&", 5); + bufferPos += 5; + } + } else if (c == '"') { + if (bufferPos + 6 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], """, 6); + bufferPos += 6; + } + } else if (c == '\'') { + if (bufferPos + 6 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "'", 6); + bufferPos += 6; + } + } else { + // Keep everything else (include UTF8) + // This preserves accented characters like é, è, à, etc. + buffer[bufferPos++] = (char)c; + } + + text++; + } + + buffer[bufferPos] = '\0'; + + // Write all at once + size_t written = file.write((const uint8_t*)buffer, bufferPos); + file.flush(); + + return written == bufferPos; +} + void Section::onPageComplete(std::unique_ptr page) { const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin"; @@ -98,7 +154,6 @@ void Section::setupCacheDir() const { SD.mkdir(cachePath.c_str()); } -// Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem) bool Section::clearCache() const { if (!SD.exists(cachePath.c_str())) { Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis()); @@ -119,9 +174,30 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const bool extraParagraphSpacing) { const auto localPath = epub->getSpineItem(spineIndex); - // TODO: Should we get rid of this file all together? - // It currently saves us a bit of memory by allowing for all the inflation bits to be released - // before loading the XML parser + // Check if it's a virtual spine item + if (epub->isVirtualSpineItem(spineIndex)) { + Serial.printf("[%lu] [SCT] Processing virtual spine item: %s\n", millis(), localPath.c_str()); + + const auto sdPath = "/sd" + localPath; + + ChapterHtmlSlimParser visitor( + sdPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight, marginBottom, marginLeft, + extraParagraphSpacing, [this](std::unique_ptr page) { this->onPageComplete(std::move(page)); }, + cachePath); + + bool success = visitor.parseAndBuildPages(); + + if (!success) { + Serial.printf("[%lu] [SCT] Failed to parse virtual file\n", millis()); + return false; + } + + writeCacheMetadata(fontId, lineCompression, marginTop, marginRight, marginBottom, marginLeft, + extraParagraphSpacing); + return true; + } + + // Normal file const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html"; File f = SD.open(tmpHtmlPath.c_str(), FILE_WRITE, true); bool success = epub->readItemContentsToStream(localPath, f, 1024); @@ -136,17 +212,179 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath; - ChapterHtmlSlimParser visitor(sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight, - marginBottom, marginLeft, extraParagraphSpacing, - [this](std::unique_ptr page) { this->onPageComplete(std::move(page)); }); + ChapterHtmlSlimParser visitor( + sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight, marginBottom, marginLeft, + extraParagraphSpacing, [this](std::unique_ptr page) { this->onPageComplete(std::move(page)); }, cachePath); + + // Track which inline footnotes AND paragraph notes are actually referenced in this file + std::set rewrittenInlineIds; + int noterefCount = 0; + + visitor.setNoterefCallback([this, ¬erefCount, &rewrittenInlineIds](Noteref& noteref) { + Serial.printf("[%lu] [SCT] Callback noteref: %s -> %s\n", millis(), noteref.number, noteref.href); + + // Extract the ID from the href for tracking + std::string href(noteref.href); + + // Check if this was rewritten to an inline or paragraph note + if (href.find("inline_") == 0 || href.find("pnote_") == 0) { + size_t underscorePos = href.find('_'); + size_t dotPos = href.find('.'); + + if (underscorePos != std::string::npos && dotPos != std::string::npos) { + std::string noteId = href.substr(underscorePos + 1, dotPos - underscorePos - 1); + rewrittenInlineIds.insert(noteId); + Serial.printf("[%lu] [SCT] Marked note as rewritten: %s\n", millis(), noteId.c_str()); + } + } else { + // Normal external footnote + epub->markAsFootnotePage(noteref.href); + } + + noterefCount++; + }); + + // Parse and build pages (inline hrefs are rewritten automatically inside parser) success = visitor.parseAndBuildPages(); SD.remove(tmpHtmlPath.c_str()); + if (!success) { Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis()); return false; } + // NOW generate inline footnote HTML files ONLY for rewritten ones + Serial.printf("[%lu] [SCT] Found %d inline footnotes, %d were referenced\n", millis(), visitor.inlineFootnoteCount, + rewrittenInlineIds.size()); + + for (int i = 0; i < visitor.inlineFootnoteCount; i++) { + const char* inlineId = visitor.inlineFootnotes[i].id; + const char* inlineText = visitor.inlineFootnotes[i].text; + + // Only generate if this inline footnote was actually referenced + if (rewrittenInlineIds.find(std::string(inlineId)) == rewrittenInlineIds.end()) { + Serial.printf("[%lu] [SCT] Skipping unreferenced inline footnote: %s\n", millis(), inlineId); + continue; + } + + // Verify that the text exists + if (!inlineText || strlen(inlineText) == 0) { + Serial.printf("[%lu] [SCT] Skipping empty inline footnote: %s\n", millis(), inlineId); + continue; + } + + Serial.printf("[%lu] [SCT] Processing inline footnote: %s (len=%d)\n", millis(), inlineId, strlen(inlineText)); + + char inlineFilename[64]; + snprintf(inlineFilename, sizeof(inlineFilename), "inline_%s.html", inlineId); + + // Store in main cache dir, not section cache dir + std::string fullPath = epub->getCachePath() + "/" + std::string(inlineFilename); + + Serial.printf("[%lu] [SCT] Generating inline file: %s\n", millis(), fullPath.c_str()); + + File file = SD.open(fullPath.c_str(), FILE_WRITE, true); + if (file) { + // valid XML declaration and encoding + file.println(""); + file.println(""); + file.println(""); + file.println(""); + file.println(""); + file.println("Footnote"); + file.println(""); + file.println(""); + + // Paragraph with content + file.print("

"); + + if (!writeEscapedXml(file, inlineText)) { + Serial.printf("[%lu] [SCT] Warning: writeEscapedXml may have failed\n", millis()); + } + + file.println("

"); + file.println(""); + file.println(""); + file.close(); + + Serial.printf("[%lu] [SCT] Generated inline footnote file\n", millis()); + + int virtualIndex = epub->addVirtualSpineItem(fullPath); + Serial.printf("[%lu] [SCT] Added virtual spine item at index %d\n", millis(), virtualIndex); + + // Mark as footnote page + char newHref[128]; + snprintf(newHref, sizeof(newHref), "%s#%s", inlineFilename, inlineId); + epub->markAsFootnotePage(newHref); + } else { + Serial.printf("[%lu] [SCT] Failed to create inline file\n", millis()); + } + } + + // Generate paragraph note HTML files + Serial.printf("[%lu] [SCT] Found %d paragraph notes\n", millis(), visitor.paragraphNoteCount); + + for (int i = 0; i < visitor.paragraphNoteCount; i++) { + const char* pnoteId = visitor.paragraphNotes[i].id; + const char* pnoteText = visitor.paragraphNotes[i].text; + + if (!pnoteText || strlen(pnoteText) == 0) { + continue; + } + + // Check if this paragraph note was referenced + if (rewrittenInlineIds.find(std::string(pnoteId)) == rewrittenInlineIds.end()) { + Serial.printf("[%lu] [SCT] Skipping unreferenced paragraph note: %s\n", millis(), pnoteId); + continue; + } + + // Create filename: pnote_rnote1.html + char pnoteFilename[64]; + snprintf(pnoteFilename, sizeof(pnoteFilename), "pnote_%s.html", pnoteId); + + std::string fullPath = epub->getCachePath() + "/" + std::string(pnoteFilename); + + Serial.printf("[%lu] [SCT] Generating paragraph note file: %s\n", millis(), fullPath.c_str()); + + File file = SD.open(fullPath.c_str(), FILE_WRITE, true); + if (file) { + file.println(""); + file.println(""); + file.println(""); + file.println(""); + file.println(""); + file.println("Note"); + file.println(""); + file.println(""); + file.print("

"); + + if (!writeEscapedXml(file, pnoteText)) { + Serial.printf("[%lu] [SCT] Warning: writeEscapedXml may have failed\n", millis()); + } + + file.println("

"); + file.println(""); + file.println(""); + file.close(); + + Serial.printf("[%lu] [SCT] Generated paragraph note file\n", millis()); + + int virtualIndex = epub->addVirtualSpineItem(fullPath); + Serial.printf("[%lu] [SCT] Added virtual spine item at index %d\n", millis(), virtualIndex); + + char newHref[128]; + snprintf(newHref, sizeof(newHref), "%s#%s", pnoteFilename, pnoteId); + epub->markAsFootnotePage(newHref); + } + } + + Serial.printf("[%lu] [SCT] Total noterefs found: %d\n", millis(), noterefCount); + writeCacheMetadata(fontId, lineCompression, marginTop, marginRight, marginBottom, marginLeft, extraParagraphSpacing); return true; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index ea15e1a..7b9eed1 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include "../Page.h" @@ -27,7 +28,6 @@ constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } -// given the start and end of a tag, check to see if it matches a known tag bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { for (int i = 0; i < possible_tag_count; i++) { if (strcmp(tag_name, possible_tags[i]) == 0) { @@ -37,23 +37,204 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } -// start a new text block if needed +const char* getAttribute(const XML_Char** atts, const char* attrName) { + if (!atts) return nullptr; + + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], attrName) == 0) { + return atts[i + 1]; + } + } + return nullptr; +} + void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::BLOCK_STYLE style) { if (currentTextBlock) { - // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { currentTextBlock->setStyle(style); return; } - makePages(); } currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing)); } +void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const char* href) { + if (currentPageFootnoteCount >= 16) return; + + Serial.printf("[%lu] [ADDFT] Adding footnote: num=%s, href=%s\n", millis(), number, href); + + // Copy number + strncpy(currentPageFootnotes[currentPageFootnoteCount].number, number, 2); + currentPageFootnotes[currentPageFootnoteCount].number[2] = '\0'; + + // Check if this is an inline footnote reference + const char* hashPos = strchr(href, '#'); + if (hashPos) { + const char* inlineId = hashPos + 1; // Skip the '#' + + // Check if we have this inline footnote + bool foundInline = false; + for (int i = 0; i < inlineFootnoteCount; i++) { + if (strcmp(inlineFootnotes[i].id, inlineId) == 0) { + // This is an inline footnote! Rewrite the href + char rewrittenHref[64]; + snprintf(rewrittenHref, sizeof(rewrittenHref), "inline_%s.html#%s", inlineId, inlineId); + + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + + Serial.printf("[%lu] [ADDFT] Rewrote inline href to: %s\n", millis(), rewrittenHref); + foundInline = true; + break; + } + } + + // Check if we have this as a paragraph note + if (!foundInline) { + for (int i = 0; i < paragraphNoteCount; i++) { + if (strcmp(paragraphNotes[i].id, inlineId) == 0) { + char rewrittenHref[64]; + snprintf(rewrittenHref, sizeof(rewrittenHref), "pnote_%s.html#%s", inlineId, inlineId); + + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + + Serial.printf("[%lu] [ADDFT] Rewrote paragraph note href to: %s\n", millis(), rewrittenHref); + foundInline = true; + break; + } + } + } + + if (!foundInline) { + // Normal href, just copy it + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + } + } else { + // No anchor, just copy + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + } + + currentPageFootnoteCount++; + + Serial.printf("[%lu] [ADDFT] Stored as: num=%s, href=%s\n", millis(), + currentPageFootnotes[currentPageFootnoteCount - 1].number, + currentPageFootnotes[currentPageFootnoteCount - 1].href); +} + void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); - (void)atts; + + // ============================================================================ + // PASS 1: Detect and collect

+ // ============================================================================ + if (strcmp(name, "p") == 0 && self->isPass1CollectingAsides) { + const char* classAttr = getAttribute(atts, "class"); + + if (classAttr && (strcmp(classAttr, "note") == 0 || strstr(classAttr, "note"))) { + Serial.printf("[%lu] [PNOTE] Found paragraph note (pass1=1)\n", millis()); + + self->insideParagraphNote = true; + self->paragraphNoteDepth = self->depth; + self->currentParagraphNoteTextLen = 0; + self->currentParagraphNoteText[0] = '\0'; + self->currentParagraphNoteId[0] = '\0'; + + self->depth += 1; + return; + } + } + + // Inside paragraph note in Pass 1, look for + if (self->insideParagraphNote && self->isPass1CollectingAsides && strcmp(name, "a") == 0) { + const char* id = getAttribute(atts, "id"); + + if (id && strncmp(id, "rnote", 5) == 0) { + strncpy(self->currentParagraphNoteId, id, 15); + self->currentParagraphNoteId[15] = '\0'; + Serial.printf("[%lu] [PNOTE] Found note ID: %s\n", millis(), id); + } + + self->depth += 1; + return; + } + + // ============================================================================ + // PASS 1: Detect and collect