diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 7559e3b3..7652ee83 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -208,6 +208,10 @@ bool Epub::parseTocNavFile() const { bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); + if (!footnotePages) { + footnotePages = new std::unordered_set(); + } + // Initialize spine/TOC cache bookMetadataCache.reset(new BookMetadataCache(cachePath)); @@ -528,7 +532,8 @@ int Epub::getSpineItemsCount() const { if (!bookMetadataCache || !bookMetadataCache->isLoaded()) { return 0; } - return bookMetadataCache->getSpineCount(); + int virtualCount = virtualSpineItems ? virtualSpineItems->size() : 0; + return bookMetadataCache->getSpineCount() + virtualCount; } size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { return getSpineItem(spineIndex).cumulativeSize; } @@ -539,6 +544,15 @@ BookMetadataCache::SpineEntry Epub::getSpineItem(const int spineIndex) const { return {}; } + // Virtual spine item + if (isVirtualSpineItem(spineIndex)) { + int virtualIndex = spineIndex - bookMetadataCache->getSpineCount(); + if (virtualSpineItems && virtualIndex >= 0 && virtualIndex < static_cast(virtualSpineItems->size())) { + // Create a dummy spine entry for virtual item + return BookMetadataCache::SpineEntry((*virtualSpineItems)[virtualIndex], 0, -1); + } + } + if (spineIndex < 0 || spineIndex >= bookMetadataCache->getSpineCount()) { Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex); return bookMetadataCache->getSpineEntry(0); @@ -628,6 +642,83 @@ int Epub::getSpineIndexForTextReference() const { return 0; } +void Epub::markAsFootnotePage(const std::string& href) { + // Lazy initialization + if (!footnotePages) { + footnotePages = new std::unordered_set(); + } + + // Extract filename from href (remove #anchor if present) + size_t hashPos = href.find('#'); + std::string filename = (hashPos != std::string::npos) ? href.substr(0, hashPos) : href; + + // Extract just the filename without path + size_t lastSlash = filename.find_last_of('/'); + if (lastSlash != std::string::npos) { + filename = filename.substr(lastSlash + 1); + } + + footnotePages->insert(filename); + Serial.printf("[%lu] [EPUB] Marked as footnote page: %s\n", millis(), filename.c_str()); +} + +bool Epub::isFootnotePage(const std::string& filename) const { + if (!footnotePages) return false; + return footnotePages->find(filename) != footnotePages->end(); +} + +bool Epub::shouldHideFromToc(int spineIndex) const { + // Always hide virtual spine items + if (isVirtualSpineItem(spineIndex)) { + return true; + } + + BookMetadataCache::SpineEntry entry = getSpineItem(spineIndex); + const std::string& spineItem = entry.href; + + // Extract filename from spine item + size_t lastSlash = spineItem.find_last_of('/'); + std::string filename = (lastSlash != std::string::npos) ? spineItem.substr(lastSlash + 1) : spineItem; + + return isFootnotePage(filename); +} + +// Virtual spine items +int Epub::addVirtualSpineItem(const std::string& path) { + // Lazy initialization + if (!virtualSpineItems) { + virtualSpineItems = new std::vector(); + } + + virtualSpineItems->push_back(path); + // Fix: use cache spine count instead of spine.size() + int currentSpineSize = bookMetadataCache ? bookMetadataCache->getSpineCount() : 0; + int newIndex = currentSpineSize + virtualSpineItems->size() - 1; + Serial.printf("[%lu] [EPUB] Added virtual spine item: %s (index %d)\n", millis(), path.c_str(), newIndex); + return newIndex; +} + +bool Epub::isVirtualSpineItem(int spineIndex) const { + int currentSpineSize = bookMetadataCache ? bookMetadataCache->getSpineCount() : 0; + return spineIndex >= currentSpineSize; +} + +int Epub::findVirtualSpineIndex(const std::string& filename) const { + if (!virtualSpineItems) return -1; + int currentSpineSize = bookMetadataCache ? bookMetadataCache->getSpineCount() : 0; + + for (size_t i = 0; i < virtualSpineItems->size(); i++) { + std::string virtualPath = (*virtualSpineItems)[i]; + size_t lastSlash = virtualPath.find_last_of('/'); + std::string virtualFilename = (lastSlash != std::string::npos) ? virtualPath.substr(lastSlash + 1) : virtualPath; + + if (virtualFilename == filename) { + return currentSpineSize + i; + } + } + return -1; +} + // Calculate progress in book (returns 0.0-1.0) float Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) const { const size_t bookSize = getBookSize(); diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 7a21efd5..b6d62f4f 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "Epub/BookMetadataCache.h" @@ -20,22 +21,30 @@ class Epub { std::string filepath; // the base path for items in the EPUB file std::string contentBasePath; - // Uniq cache key based on filepath std::string cachePath; // Spine and TOC cache std::unique_ptr bookMetadataCache; + // Use pointers, allocate only if needed + std::unordered_set* footnotePages; + std::vector* virtualSpineItems; + bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; public: - explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { - // create a cache key based on the filepath + explicit Epub(std::string filepath, const std::string& cacheDir) + : filepath(std::move(filepath)), footnotePages(nullptr), virtualSpineItems(nullptr) { cachePath = cacheDir + "/epub_" + std::to_string(std::hash{}(this->filepath)); } - ~Epub() = default; + + ~Epub() { + delete footnotePages; + delete virtualSpineItems; + } + std::string& getBasePath() { return contentBasePath; } bool load(bool buildIfMissing = true); bool clearCache() const; @@ -62,6 +71,13 @@ class Epub { size_t getCumulativeSpineItemSize(int spineIndex) const; int getSpineIndexForTextReference() const; + void markAsFootnotePage(const std::string& href); + bool isFootnotePage(const std::string& filename) const; + bool shouldHideFromToc(int spineIndex) const; + int addVirtualSpineItem(const std::string& path); + bool isVirtualSpineItem(int spineIndex) const; + int findVirtualSpineIndex(const std::string& filename) const; + size_t getBookSize() const; float calculateProgress(int currentSpineIndex, float currentSpineRead) const; }; diff --git a/lib/Epub/Epub/FootnoteEntry.h b/lib/Epub/Epub/FootnoteEntry.h new file mode 100644 index 00000000..39f0c264 --- /dev/null +++ b/lib/Epub/Epub/FootnoteEntry.h @@ -0,0 +1,12 @@ +#pragma once + +struct FootnoteEntry { + char number[3]; + char href[64]; + bool isInline; + + FootnoteEntry() : isInline(false) { + number[0] = '\0'; + href[0] = '\0'; + } +}; diff --git a/lib/Epub/Epub/Page.cpp b/lib/Epub/Epub/Page.cpp index 92839eb7..7632e1bb 100644 --- a/lib/Epub/Epub/Page.cpp +++ b/lib/Epub/Epub/Page.cpp @@ -43,6 +43,16 @@ bool Page::serialize(FsFile& file) const { } } + // Serialize footnotes + int32_t fCount = footnotes.size(); + serialization::writePod(file, fCount); + for (const auto& fn : footnotes) { + file.write(fn.number, 3); + file.write(fn.href, 64); + uint8_t isInlineFlag = fn.isInline ? 1 : 0; + file.write(&isInlineFlag, 1); + } + return true; } @@ -65,5 +75,18 @@ std::unique_ptr Page::deserialize(FsFile& file) { } } + int32_t footnoteCount; + serialization::readPod(file, footnoteCount); + + for (int i = 0; i < footnoteCount; i++) { + FootnoteEntry entry; + file.read(entry.number, 3); + file.read(entry.href, 64); + uint8_t isInlineFlag = 0; + file.read(&isInlineFlag, 1); + entry.isInline = (isInlineFlag != 0); + page->footnotes.push_back(entry); + } + return page; } diff --git a/lib/Epub/Epub/Page.h b/lib/Epub/Epub/Page.h index 20061941..291ef1d6 100644 --- a/lib/Epub/Epub/Page.h +++ b/lib/Epub/Epub/Page.h @@ -1,16 +1,15 @@ -#pragma once #include #include #include +#include "FootnoteEntry.h" #include "blocks/TextBlock.h" enum PageElementTag : uint8_t { TAG_PageLine = 1, }; -// represents something that has been added to a page class PageElement { public: int16_t xPos; @@ -21,7 +20,6 @@ class PageElement { virtual bool serialize(FsFile& file) = 0; }; -// a line from a block element class PageLine final : public PageElement { std::shared_ptr block; @@ -37,6 +35,19 @@ class Page { public: // the list of block index and line numbers on this page std::vector> elements; + std::vector footnotes; + + void addFootnote(const char* number, const char* href) { + FootnoteEntry entry; + // ensure null termination and bounds + strncpy(entry.number, number, 2); + entry.number[2] = '\0'; + strncpy(entry.href, href, 63); + entry.href[63] = '\0'; + entry.isInline = false; // Default + footnotes.push_back(entry); + } + void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const; bool serialize(FsFile& file) const; static std::unique_ptr deserialize(FsFile& file); diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 81d688ec..b5297814 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -49,17 +49,25 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s } // namespace -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, + std::unique_ptr footnote) { if (word.empty()) return; words.push_back(std::move(word)); wordStyles.push_back(fontStyle); + if (footnote) { + wordHasFootnote.push_back(1); + footnoteQueue.push_back(*footnote); + } else { + wordHasFootnote.push_back(0); + } } // Consumes data to minimize memory usage -void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const uint16_t viewportWidth, - const std::function)>& processLine, - const bool includeLastLine) { +void ParsedText::layoutAndExtractLines( + const GfxRenderer& renderer, const int fontId, const uint16_t viewportWidth, + const std::function, const std::vector&)>& processLine, + const bool includeLastLine) { if (words.empty()) { return; } @@ -255,8 +263,8 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r return lineBreakIndices; } -// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the -// available width. +// Splits words[wordIndex] into prefix (adding a hyphen only when needed) +// and remainder when a legal breakpoint fits the available width. bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer, const int fontId, std::vector& wordWidths, const bool allowFallbackBreaks) { @@ -320,6 +328,13 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl words.insert(insertWordIt, remainder); wordStyles.insert(insertStyleIt, style); + // Split wordHasFootnote as well. The footnote (if any) is associated with the remainder word. + auto wordHasFootnoteIt = wordHasFootnote.begin(); + std::advance(wordHasFootnoteIt, wordIndex); + uint8_t hasFootnote = *wordHasFootnoteIt; + *wordHasFootnoteIt = 0; // First part doesn't have it anymore + wordHasFootnote.insert(std::next(wordHasFootnoteIt), hasFootnote); + // Update cached widths to reflect the new prefix/remainder pairing. wordWidths[wordIndex] = static_cast(chosenWidth); const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style); @@ -327,9 +342,10 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl return true; } -void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth, - const std::vector& wordWidths, const std::vector& lineBreakIndices, - const std::function)>& processLine) { +void ParsedText::extractLine( + const size_t breakIndex, const int pageWidth, const int spaceWidth, const std::vector& wordWidths, + const std::vector& lineBreakIndices, + const std::function, const std::vector&)>& processLine) { const size_t lineBreak = lineBreakIndices[breakIndex]; const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lineWordCount = lineBreak - lastBreakAt; @@ -372,17 +388,35 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); - // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); + // Extract footnote flags from deque + std::vector lineFootnotes; + for (size_t i = 0; i < lineWordCount; i++) { + if (!wordHasFootnote.empty()) { + uint8_t hasFn = wordHasFootnote.front(); + wordHasFootnote.pop_front(); + + if (hasFn) { + if (footnoteQueue.empty()) { + Serial.printf("[%lu] [ERROR] Footnote flag set but queue empty! Flags/queue out of sync.\n", millis()); + break; + } + lineFootnotes.push_back(footnoteQueue.front()); + footnoteQueue.pop_front(); + } + } + } + for (auto& word : lineWords) { if (containsSoftHyphen(word)) { stripSoftHyphensInPlace(word); } } - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); -} \ No newline at end of file + processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style), + lineFootnotes); +} diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index e72db7ef..30e1c5be 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -2,12 +2,14 @@ #include +#include #include #include #include #include #include +#include "FootnoteEntry.h" #include "blocks/TextBlock.h" class GfxRenderer; @@ -15,6 +17,8 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; + std::deque wordHasFootnote; + std::deque footnoteQueue; TextBlock::Style style; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -26,9 +30,10 @@ class ParsedText { int spaceWidth, std::vector& wordWidths); bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId, std::vector& wordWidths, bool allowFallbackBreaks); - void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector& wordWidths, - const std::vector& lineBreakIndices, - const std::function)>& processLine); + void extractLine( + size_t breakIndex, int pageWidth, int spaceWidth, const std::vector& wordWidths, + const std::vector& lineBreakIndices, + const std::function, const std::vector&)>& processLine); std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: @@ -37,12 +42,13 @@ class ParsedText { : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; - void addWord(std::string word, EpdFontFamily::Style fontStyle); + void addWord(std::string word, EpdFontFamily::Style fontStyle, std::unique_ptr footnote = nullptr); void setStyle(const TextBlock::Style style) { this->style = style; } TextBlock::Style getStyle() const { return style; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } - void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, - const std::function)>& processLine, - bool includeLastLine = true); -}; \ No newline at end of file + void layoutAndExtractLines( + const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, + const std::function, const std::vector&)>& processLine, + bool includeLastLine = true); +}; diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 581a364f..5409b826 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -3,6 +3,10 @@ #include #include +#include +#include + +#include "FsHelpers.h" #include "Page.h" #include "hyphenation/Hyphenator.h" #include "parsers/ChapterHtmlSlimParser.h" @@ -14,6 +18,60 @@ constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(uint32_t); } // namespace +// Helper function to write XML-escaped text directly to file +static bool writeEscapedXml(FsFile& file, const char* text) { + if (!text) return true; + + // Use a static buffer to avoid heap allocation + static char buffer[2048]; + int bufferPos = 0; + + while (*text && bufferPos < sizeof(buffer) - 10) { // Leave margin for entities + unsigned char c = (unsigned char)*text; + + // Only escape the 5 XML special characters + if (c == '<') { + if (bufferPos + 4 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "<", 4); + bufferPos += 4; + } + } else if (c == '>') { + if (bufferPos + 4 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], ">", 4); + bufferPos += 4; + } + } else if (c == '&') { + if (bufferPos + 5 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "&", 5); + bufferPos += 5; + } + } else if (c == '"') { + if (bufferPos + 6 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], """, 6); + bufferPos += 6; + } + } else if (c == '\'') { + if (bufferPos + 6 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "'", 6); + bufferPos += 6; + } + } else { + // Keep everything else (include UTF8) + buffer[bufferPos++] = (char)c; + } + + text++; + } + + buffer[bufferPos] = '\0'; + + // Write all at once + size_t written = file.write((const uint8_t*)buffer, bufferPos); + file.flush(); + + return written == bufferPos; +} + uint32_t Section::onPageComplete(std::unique_ptr page) { if (!file) { Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount); @@ -25,7 +83,8 @@ uint32_t Section::onPageComplete(std::unique_ptr page) { Serial.printf("[%lu] [SCT] Failed to serialize page %d\n", millis(), pageCount); return 0; } - Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount); + // Debug reduce log spam + // Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount); pageCount++; return position; @@ -104,7 +163,6 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con return true; } -// Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem) bool Section::clearCache() const { if (!SdMan.exists(filePath.c_str())) { Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis()); @@ -126,7 +184,9 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c const std::function& progressSetupFn, const std::function& progressFn) { constexpr uint32_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB - const auto localPath = epub->getSpineItem(spineIndex).href; + + BookMetadataCache::SpineEntry spineEntry = epub->getSpineItem(spineIndex); + const std::string localPath = spineEntry.href; const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html"; // Create cache directory if it doesn't exist @@ -135,43 +195,43 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c SdMan.mkdir(sectionsDir.c_str()); } - // Retry logic for SD card timing issues + bool isVirtual = epub->isVirtualSpineItem(spineIndex); bool success = false; uint32_t fileSize = 0; - for (int attempt = 0; attempt < 3 && !success; attempt++) { - if (attempt > 0) { - Serial.printf("[%lu] [SCT] Retrying stream (attempt %d)...\n", millis(), attempt + 1); - delay(50); // Brief delay before retry + std::string fileToParse = tmpHtmlPath; + + if (isVirtual) { + Serial.printf("[%lu] [SCT] Processing virtual spine item: %s\n", millis(), localPath.c_str()); + // For virtual items, the path is already on SD, e.g. /sd/cache/... + // But we need to make sure the parser can read it. + // If it starts with /sd/, we might need to strip it if using SdFat with root? + // Assuming absolute path is fine. + fileToParse = localPath; + success = true; + fileSize = 0; // Don't check size for progress bar on virtual items + } else { + // Normal file - stream from zip + for (int attempt = 0; attempt < 3 && !success; attempt++) { + if (attempt > 0) delay(50); + + if (SdMan.exists(tmpHtmlPath.c_str())) SdMan.remove(tmpHtmlPath.c_str()); + + FsFile tmpHtml; + if (!SdMan.openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) continue; + success = epub->readItemContentsToStream(localPath, tmpHtml, 1024); + fileSize = tmpHtml.size(); + tmpHtml.close(); + + if (!success && SdMan.exists(tmpHtmlPath.c_str())) SdMan.remove(tmpHtmlPath.c_str()); } - // Remove any incomplete file from previous attempt before retrying - if (SdMan.exists(tmpHtmlPath.c_str())) { - SdMan.remove(tmpHtmlPath.c_str()); - } - - FsFile tmpHtml; - if (!SdMan.openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) { - continue; - } - success = epub->readItemContentsToStream(localPath, tmpHtml, 1024); - fileSize = tmpHtml.size(); - tmpHtml.close(); - - // If streaming failed, remove the incomplete file immediately - if (!success && SdMan.exists(tmpHtmlPath.c_str())) { - SdMan.remove(tmpHtmlPath.c_str()); - Serial.printf("[%lu] [SCT] Removed incomplete temp file after failed attempt\n", millis()); + if (!success) { + Serial.printf("[%lu] [SCT] Failed to stream item contents\n", millis()); + return false; } } - if (!success) { - Serial.printf("[%lu] [SCT] Failed to stream item contents to temp file after retries\n", millis()); - return false; - } - - Serial.printf("[%lu] [SCT] Streamed temp HTML to %s (%d bytes)\n", millis(), tmpHtmlPath.c_str(), fileSize); - - // Only show progress bar for larger chapters where rendering overhead is worth it + // Only show progress bar for larger chapters if (progressSetupFn && fileSize >= MIN_SIZE_FOR_PROGRESS) { progressSetupFn(); } @@ -183,15 +243,44 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c viewportHeight, hyphenationEnabled); std::vector lut = {}; - ChapterHtmlSlimParser visitor( - tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, + std::unique_ptr visitor(new ChapterHtmlSlimParser( + fileToParse, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn); - Hyphenator::setPreferredLanguage(epub->getLanguage()); - success = visitor.parseAndBuildPages(); + progressFn)); + + Hyphenator::setPreferredLanguage(epub->getLanguage()); + + // Track which inline footnotes AND paragraph notes are actually referenced in this file + std::set rewrittenInlineIds; + int noterefCount = 0; + + visitor->setNoterefCallback([this, ¬erefCount, &rewrittenInlineIds](Noteref& noteref) { + // Extract the ID from the href for tracking + std::string href(noteref.href); + + // Check if this was rewritten to an inline or paragraph note + if (href.find("inline_") == 0 || href.find("pnote_") == 0) { + size_t underscorePos = href.find('_'); + size_t dotPos = href.find('.'); + + if (underscorePos != std::string::npos && dotPos != std::string::npos) { + std::string noteId = href.substr(underscorePos + 1, dotPos - underscorePos - 1); + rewrittenInlineIds.insert(noteId); + } + } else { + // Normal external footnote + epub->markAsFootnotePage(noteref.href); + } + noterefCount++; + }); + + success = visitor->parseAndBuildPages(); + + if (!isVirtual) { + SdMan.remove(tmpHtmlPath.c_str()); + } - SdMan.remove(tmpHtmlPath.c_str()); if (!success) { Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis()); file.close(); @@ -199,9 +288,77 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c return false; } + // --- Footnote Generation Logic (Merged from HEAD) --- + + // Inline footnotes + for (int i = 0; i < visitor->inlineFootnoteCount; i++) { + const char* inlineId = visitor->inlineFootnotes[i].id; + const char* inlineText = visitor->inlineFootnotes[i].text; + + if (rewrittenInlineIds.find(std::string(inlineId)) == rewrittenInlineIds.end()) continue; + if (!inlineText || strlen(inlineText) == 0) continue; + + char inlineFilename[64]; + snprintf(inlineFilename, sizeof(inlineFilename), "inline_%s.html", inlineId); + std::string fullPath = epub->getCachePath() + "/" + std::string(inlineFilename); + + FsFile file; + if (SdMan.openFileForWrite("SCT", fullPath, file)) { + file.println(""); + file.println(""); + file.println(""); + file.println("Footnote"); + file.println(""); + file.print("

"); + writeEscapedXml(file, inlineText); + file.println("

"); + file.close(); + + int virtualIndex = epub->addVirtualSpineItem(fullPath); + char newHref[128]; + snprintf(newHref, sizeof(newHref), "%s#%s", inlineFilename, inlineId); + epub->markAsFootnotePage(newHref); + } + } + + // Paragraph notes + for (int i = 0; i < visitor->paragraphNoteCount; i++) { + const char* pnoteId = visitor->paragraphNotes[i].id; + const char* pnoteText = visitor->paragraphNotes[i].text; + + if (!pnoteText || strlen(pnoteText) == 0) continue; + if (rewrittenInlineIds.find(std::string(pnoteId)) == rewrittenInlineIds.end()) continue; + + char pnoteFilename[64]; + snprintf(pnoteFilename, sizeof(pnoteFilename), "pnote_%s.html", pnoteId); + std::string fullPath = epub->getCachePath() + "/" + std::string(pnoteFilename); + + FsFile file; + if (SdMan.openFileForWrite("SCT", fullPath, file)) { + file.println(""); + file.println(""); + file.println(""); + file.println("Note"); + file.println(""); + file.print("

"); + writeEscapedXml(file, pnoteText); + file.println("

"); + file.close(); + + int virtualIndex = epub->addVirtualSpineItem(fullPath); + char newHref[128]; + snprintf(newHref, sizeof(newHref), "%s#%s", pnoteFilename, pnoteId); + epub->markAsFootnotePage(newHref); + } + } + + // Write LUT (master) const uint32_t lutOffset = file.position(); bool hasFailedLutRecords = false; - // Write LUT for (const uint32_t& pos : lut) { if (pos == 0) { hasFailedLutRecords = true; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 298c4ec6..5dd83130 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -30,7 +30,6 @@ constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } -// given the start and end of a tag, check to see if it matches a known tag bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { for (int i = 0; i < possible_tag_count; i++) { if (strcmp(tag_name, possible_tags[i]) == 0) { @@ -40,46 +39,332 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +const char* getAttribute(const XML_Char** atts, const char* attrName) { + if (!atts) return nullptr; + + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], attrName) == 0) { + return atts[i + 1]; + } + } + return nullptr; +} + +// Simple HTML entity replacement for noteref text +std::string replaceHtmlEntities(const char* text) { + if (!text) return ""; + std::string s(text); + + // Replace common entities + size_t pos = 0; + while ((pos = s.find("<", pos)) != std::string::npos) { + s.replace(pos, 4, "<"); + pos += 1; + } + pos = 0; + while ((pos = s.find(">", pos)) != std::string::npos) { + s.replace(pos, 4, ">"); + pos += 1; + } + pos = 0; + while ((pos = s.find("&", pos)) != std::string::npos) { + s.replace(pos, 5, "&"); + pos += 1; + } + pos = 0; + while ((pos = s.find(""", pos)) != std::string::npos) { + s.replace(pos, 6, "\""); + pos += 1; + } + pos = 0; + while ((pos = s.find("'", pos)) != std::string::npos) { + s.replace(pos, 6, "'"); + pos += 1; + } + return s; +} + +EpdFontFamily::Style ChapterHtmlSlimParser::getCurrentFontStyle() const { + if (boldUntilDepth < depth && italicUntilDepth < depth) { + return EpdFontFamily::BOLD_ITALIC; + } else if (boldUntilDepth < depth) { + return EpdFontFamily::BOLD; + } else if (italicUntilDepth < depth) { + return EpdFontFamily::ITALIC; + } + return EpdFontFamily::REGULAR; +} + // flush the contents of partWordBuffer to currentTextBlock void ChapterHtmlSlimParser::flushPartWordBuffer() { - // determine font style - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (boldUntilDepth < depth && italicUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (boldUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (italicUntilDepth < depth) { - fontStyle = EpdFontFamily::ITALIC; - } + EpdFontFamily::Style fontStyle = getCurrentFontStyle(); // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; - currentTextBlock->addWord(partWordBuffer, fontStyle); + currentTextBlock->addWord(std::move(replaceHtmlEntities(partWordBuffer)), fontStyle); partWordBufferIndex = 0; } // start a new text block if needed void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { if (currentTextBlock) { - // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { currentTextBlock->setStyle(style); return; } - makePages(); } currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); } +std::unique_ptr ChapterHtmlSlimParser::createFootnoteEntry(const char* number, const char* href) { + auto entry = std::unique_ptr(new FootnoteEntry()); + + Serial.printf("[%lu] [ADDFT] Creating footnote: num=%s, href=%s\n", millis(), number, href); + + // Copy number + strncpy(entry->number, number, 2); + entry->number[2] = '\0'; + + // Check if this is an inline footnote reference + const char* hashPos = strchr(href, '#'); + if (hashPos) { + const char* inlineId = hashPos + 1; // Skip the '#' + + // Check if we have this inline footnote + bool foundInline = false; + for (int i = 0; i < inlineFootnoteCount; i++) { + if (strcmp(inlineFootnotes[i].id, inlineId) == 0) { + // This is an inline footnote! Rewrite the href + char rewrittenHref[64]; + snprintf(rewrittenHref, sizeof(rewrittenHref), "inline_%s.html#%s", inlineId, inlineId); + + strncpy(entry->href, rewrittenHref, 63); + entry->href[63] = '\0'; + + Serial.printf("[%lu] [ADDFT] Rewrote inline href to: %s\n", millis(), rewrittenHref); + foundInline = true; + break; + } + } + + // Check if we have this as a paragraph note + if (!foundInline) { + for (int i = 0; i < paragraphNoteCount; i++) { + if (strcmp(paragraphNotes[i].id, inlineId) == 0) { + char rewrittenHref[64]; + snprintf(rewrittenHref, sizeof(rewrittenHref), "pnote_%s.html#%s", inlineId, inlineId); + + strncpy(entry->href, rewrittenHref, 63); + entry->href[63] = '\0'; + + Serial.printf("[%lu] [ADDFT] Rewrote paragraph note href to: %s\n", millis(), rewrittenHref); + foundInline = true; + break; + } + } + } + + if (!foundInline) { + // Normal href, just copy it + strncpy(entry->href, href, 63); + entry->href[63] = '\0'; + } + } else { + // No anchor, just copy + strncpy(entry->href, href, 63); + entry->href[63] = '\0'; + } + + Serial.printf("[%lu] [ADDFT] Created as: num=%s, href=%s\n", millis(), entry->number, entry->href); + return entry; +} + void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); + // ============================================================================ + // PASS 1: Detect and collect

+ // ============================================================================ + if (strcmp(name, "p") == 0 && self->isPass1CollectingAsides) { + const char* classAttr = getAttribute(atts, "class"); + + if (classAttr && (strcmp(classAttr, "note") == 0 || strstr(classAttr, "note"))) { + Serial.printf("[%lu] [PNOTE] Found paragraph note (pass1=1)\n", millis()); + + self->insideParagraphNote = true; + self->paragraphNoteDepth = self->depth; + self->currentParagraphNoteTextLen = 0; + self->currentParagraphNoteText[0] = '\0'; + self->currentParagraphNoteId[0] = '\0'; + + self->depth += 1; + return; + } + } + + // Inside paragraph note in Pass 1, look for + if (self->insideParagraphNote && self->isPass1CollectingAsides && strcmp(name, "a") == 0) { + const char* id = getAttribute(atts, "id"); + + if (id && strncmp(id, "rnote", 5) == 0) { + strncpy(self->currentParagraphNoteId, id, 15); + self->currentParagraphNoteId[15] = '\0'; + Serial.printf("[%lu] [PNOTE] Found note ID: %s\n", millis(), id); + } + + self->depth += 1; + return; + } + + // ============================================================================ + // PASS 1: Detect and collect