diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 7559e3b3..7652ee83 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -208,6 +208,10 @@ bool Epub::parseTocNavFile() const { bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); + if (!footnotePages) { + footnotePages = new std::unordered_set(); + } + // Initialize spine/TOC cache bookMetadataCache.reset(new BookMetadataCache(cachePath)); @@ -528,7 +532,8 @@ int Epub::getSpineItemsCount() const { if (!bookMetadataCache || !bookMetadataCache->isLoaded()) { return 0; } - return bookMetadataCache->getSpineCount(); + int virtualCount = virtualSpineItems ? virtualSpineItems->size() : 0; + return bookMetadataCache->getSpineCount() + virtualCount; } size_t Epub::getCumulativeSpineItemSize(const int spineIndex) const { return getSpineItem(spineIndex).cumulativeSize; } @@ -539,6 +544,15 @@ BookMetadataCache::SpineEntry Epub::getSpineItem(const int spineIndex) const { return {}; } + // Virtual spine item + if (isVirtualSpineItem(spineIndex)) { + int virtualIndex = spineIndex - bookMetadataCache->getSpineCount(); + if (virtualSpineItems && virtualIndex >= 0 && virtualIndex < static_cast(virtualSpineItems->size())) { + // Create a dummy spine entry for virtual item + return BookMetadataCache::SpineEntry((*virtualSpineItems)[virtualIndex], 0, -1); + } + } + if (spineIndex < 0 || spineIndex >= bookMetadataCache->getSpineCount()) { Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex); return bookMetadataCache->getSpineEntry(0); @@ -628,6 +642,83 @@ int Epub::getSpineIndexForTextReference() const { return 0; } +void Epub::markAsFootnotePage(const std::string& href) { + // Lazy initialization + if (!footnotePages) { + footnotePages = new std::unordered_set(); + } + + // Extract filename from href (remove #anchor if present) + size_t hashPos = href.find('#'); + std::string filename = (hashPos != std::string::npos) ? href.substr(0, hashPos) : href; + + // Extract just the filename without path + size_t lastSlash = filename.find_last_of('/'); + if (lastSlash != std::string::npos) { + filename = filename.substr(lastSlash + 1); + } + + footnotePages->insert(filename); + Serial.printf("[%lu] [EPUB] Marked as footnote page: %s\n", millis(), filename.c_str()); +} + +bool Epub::isFootnotePage(const std::string& filename) const { + if (!footnotePages) return false; + return footnotePages->find(filename) != footnotePages->end(); +} + +bool Epub::shouldHideFromToc(int spineIndex) const { + // Always hide virtual spine items + if (isVirtualSpineItem(spineIndex)) { + return true; + } + + BookMetadataCache::SpineEntry entry = getSpineItem(spineIndex); + const std::string& spineItem = entry.href; + + // Extract filename from spine item + size_t lastSlash = spineItem.find_last_of('/'); + std::string filename = (lastSlash != std::string::npos) ? spineItem.substr(lastSlash + 1) : spineItem; + + return isFootnotePage(filename); +} + +// Virtual spine items +int Epub::addVirtualSpineItem(const std::string& path) { + // Lazy initialization + if (!virtualSpineItems) { + virtualSpineItems = new std::vector(); + } + + virtualSpineItems->push_back(path); + // Fix: use cache spine count instead of spine.size() + int currentSpineSize = bookMetadataCache ? bookMetadataCache->getSpineCount() : 0; + int newIndex = currentSpineSize + virtualSpineItems->size() - 1; + Serial.printf("[%lu] [EPUB] Added virtual spine item: %s (index %d)\n", millis(), path.c_str(), newIndex); + return newIndex; +} + +bool Epub::isVirtualSpineItem(int spineIndex) const { + int currentSpineSize = bookMetadataCache ? bookMetadataCache->getSpineCount() : 0; + return spineIndex >= currentSpineSize; +} + +int Epub::findVirtualSpineIndex(const std::string& filename) const { + if (!virtualSpineItems) return -1; + int currentSpineSize = bookMetadataCache ? bookMetadataCache->getSpineCount() : 0; + + for (size_t i = 0; i < virtualSpineItems->size(); i++) { + std::string virtualPath = (*virtualSpineItems)[i]; + size_t lastSlash = virtualPath.find_last_of('/'); + std::string virtualFilename = (lastSlash != std::string::npos) ? virtualPath.substr(lastSlash + 1) : virtualPath; + + if (virtualFilename == filename) { + return currentSpineSize + i; + } + } + return -1; +} + // Calculate progress in book (returns 0.0-1.0) float Epub::calculateProgress(const int currentSpineIndex, const float currentSpineRead) const { const size_t bookSize = getBookSize(); diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 7a21efd5..b6d62f4f 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "Epub/BookMetadataCache.h" @@ -20,22 +21,30 @@ class Epub { std::string filepath; // the base path for items in the EPUB file std::string contentBasePath; - // Uniq cache key based on filepath std::string cachePath; // Spine and TOC cache std::unique_ptr bookMetadataCache; + // Use pointers, allocate only if needed + std::unordered_set* footnotePages; + std::vector* virtualSpineItems; + bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; public: - explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { - // create a cache key based on the filepath + explicit Epub(std::string filepath, const std::string& cacheDir) + : filepath(std::move(filepath)), footnotePages(nullptr), virtualSpineItems(nullptr) { cachePath = cacheDir + "/epub_" + std::to_string(std::hash{}(this->filepath)); } - ~Epub() = default; + + ~Epub() { + delete footnotePages; + delete virtualSpineItems; + } + std::string& getBasePath() { return contentBasePath; } bool load(bool buildIfMissing = true); bool clearCache() const; @@ -62,6 +71,13 @@ class Epub { size_t getCumulativeSpineItemSize(int spineIndex) const; int getSpineIndexForTextReference() const; + void markAsFootnotePage(const std::string& href); + bool isFootnotePage(const std::string& filename) const; + bool shouldHideFromToc(int spineIndex) const; + int addVirtualSpineItem(const std::string& path); + bool isVirtualSpineItem(int spineIndex) const; + int findVirtualSpineIndex(const std::string& filename) const; + size_t getBookSize() const; float calculateProgress(int currentSpineIndex, float currentSpineRead) const; }; diff --git a/lib/Epub/Epub/FootnoteEntry.h b/lib/Epub/Epub/FootnoteEntry.h new file mode 100644 index 00000000..39f0c264 --- /dev/null +++ b/lib/Epub/Epub/FootnoteEntry.h @@ -0,0 +1,12 @@ +#pragma once + +struct FootnoteEntry { + char number[3]; + char href[64]; + bool isInline; + + FootnoteEntry() : isInline(false) { + number[0] = '\0'; + href[0] = '\0'; + } +}; diff --git a/lib/Epub/Epub/Page.cpp b/lib/Epub/Epub/Page.cpp index 92839eb7..7632e1bb 100644 --- a/lib/Epub/Epub/Page.cpp +++ b/lib/Epub/Epub/Page.cpp @@ -43,6 +43,16 @@ bool Page::serialize(FsFile& file) const { } } + // Serialize footnotes + int32_t fCount = footnotes.size(); + serialization::writePod(file, fCount); + for (const auto& fn : footnotes) { + file.write(fn.number, 3); + file.write(fn.href, 64); + uint8_t isInlineFlag = fn.isInline ? 1 : 0; + file.write(&isInlineFlag, 1); + } + return true; } @@ -65,5 +75,18 @@ std::unique_ptr Page::deserialize(FsFile& file) { } } + int32_t footnoteCount; + serialization::readPod(file, footnoteCount); + + for (int i = 0; i < footnoteCount; i++) { + FootnoteEntry entry; + file.read(entry.number, 3); + file.read(entry.href, 64); + uint8_t isInlineFlag = 0; + file.read(&isInlineFlag, 1); + entry.isInline = (isInlineFlag != 0); + page->footnotes.push_back(entry); + } + return page; } diff --git a/lib/Epub/Epub/Page.h b/lib/Epub/Epub/Page.h index 20061941..291ef1d6 100644 --- a/lib/Epub/Epub/Page.h +++ b/lib/Epub/Epub/Page.h @@ -1,16 +1,15 @@ -#pragma once #include #include #include +#include "FootnoteEntry.h" #include "blocks/TextBlock.h" enum PageElementTag : uint8_t { TAG_PageLine = 1, }; -// represents something that has been added to a page class PageElement { public: int16_t xPos; @@ -21,7 +20,6 @@ class PageElement { virtual bool serialize(FsFile& file) = 0; }; -// a line from a block element class PageLine final : public PageElement { std::shared_ptr block; @@ -37,6 +35,19 @@ class Page { public: // the list of block index and line numbers on this page std::vector> elements; + std::vector footnotes; + + void addFootnote(const char* number, const char* href) { + FootnoteEntry entry; + // ensure null termination and bounds + strncpy(entry.number, number, 2); + entry.number[2] = '\0'; + strncpy(entry.href, href, 63); + entry.href[63] = '\0'; + entry.isInline = false; // Default + footnotes.push_back(entry); + } + void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const; bool serialize(FsFile& file) const; static std::unique_ptr deserialize(FsFile& file); diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 581a364f..5409b826 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -3,6 +3,10 @@ #include #include +#include +#include + +#include "FsHelpers.h" #include "Page.h" #include "hyphenation/Hyphenator.h" #include "parsers/ChapterHtmlSlimParser.h" @@ -14,6 +18,60 @@ constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(uint32_t); } // namespace +// Helper function to write XML-escaped text directly to file +static bool writeEscapedXml(FsFile& file, const char* text) { + if (!text) return true; + + // Use a static buffer to avoid heap allocation + static char buffer[2048]; + int bufferPos = 0; + + while (*text && bufferPos < sizeof(buffer) - 10) { // Leave margin for entities + unsigned char c = (unsigned char)*text; + + // Only escape the 5 XML special characters + if (c == '<') { + if (bufferPos + 4 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "<", 4); + bufferPos += 4; + } + } else if (c == '>') { + if (bufferPos + 4 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], ">", 4); + bufferPos += 4; + } + } else if (c == '&') { + if (bufferPos + 5 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "&", 5); + bufferPos += 5; + } + } else if (c == '"') { + if (bufferPos + 6 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], """, 6); + bufferPos += 6; + } + } else if (c == '\'') { + if (bufferPos + 6 < sizeof(buffer)) { + memcpy(&buffer[bufferPos], "'", 6); + bufferPos += 6; + } + } else { + // Keep everything else (include UTF8) + buffer[bufferPos++] = (char)c; + } + + text++; + } + + buffer[bufferPos] = '\0'; + + // Write all at once + size_t written = file.write((const uint8_t*)buffer, bufferPos); + file.flush(); + + return written == bufferPos; +} + uint32_t Section::onPageComplete(std::unique_ptr page) { if (!file) { Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount); @@ -25,7 +83,8 @@ uint32_t Section::onPageComplete(std::unique_ptr page) { Serial.printf("[%lu] [SCT] Failed to serialize page %d\n", millis(), pageCount); return 0; } - Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount); + // Debug reduce log spam + // Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount); pageCount++; return position; @@ -104,7 +163,6 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con return true; } -// Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem) bool Section::clearCache() const { if (!SdMan.exists(filePath.c_str())) { Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis()); @@ -126,7 +184,9 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c const std::function& progressSetupFn, const std::function& progressFn) { constexpr uint32_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB - const auto localPath = epub->getSpineItem(spineIndex).href; + + BookMetadataCache::SpineEntry spineEntry = epub->getSpineItem(spineIndex); + const std::string localPath = spineEntry.href; const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html"; // Create cache directory if it doesn't exist @@ -135,43 +195,43 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c SdMan.mkdir(sectionsDir.c_str()); } - // Retry logic for SD card timing issues + bool isVirtual = epub->isVirtualSpineItem(spineIndex); bool success = false; uint32_t fileSize = 0; - for (int attempt = 0; attempt < 3 && !success; attempt++) { - if (attempt > 0) { - Serial.printf("[%lu] [SCT] Retrying stream (attempt %d)...\n", millis(), attempt + 1); - delay(50); // Brief delay before retry + std::string fileToParse = tmpHtmlPath; + + if (isVirtual) { + Serial.printf("[%lu] [SCT] Processing virtual spine item: %s\n", millis(), localPath.c_str()); + // For virtual items, the path is already on SD, e.g. /sd/cache/... + // But we need to make sure the parser can read it. + // If it starts with /sd/, we might need to strip it if using SdFat with root? + // Assuming absolute path is fine. + fileToParse = localPath; + success = true; + fileSize = 0; // Don't check size for progress bar on virtual items + } else { + // Normal file - stream from zip + for (int attempt = 0; attempt < 3 && !success; attempt++) { + if (attempt > 0) delay(50); + + if (SdMan.exists(tmpHtmlPath.c_str())) SdMan.remove(tmpHtmlPath.c_str()); + + FsFile tmpHtml; + if (!SdMan.openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) continue; + success = epub->readItemContentsToStream(localPath, tmpHtml, 1024); + fileSize = tmpHtml.size(); + tmpHtml.close(); + + if (!success && SdMan.exists(tmpHtmlPath.c_str())) SdMan.remove(tmpHtmlPath.c_str()); } - // Remove any incomplete file from previous attempt before retrying - if (SdMan.exists(tmpHtmlPath.c_str())) { - SdMan.remove(tmpHtmlPath.c_str()); - } - - FsFile tmpHtml; - if (!SdMan.openFileForWrite("SCT", tmpHtmlPath, tmpHtml)) { - continue; - } - success = epub->readItemContentsToStream(localPath, tmpHtml, 1024); - fileSize = tmpHtml.size(); - tmpHtml.close(); - - // If streaming failed, remove the incomplete file immediately - if (!success && SdMan.exists(tmpHtmlPath.c_str())) { - SdMan.remove(tmpHtmlPath.c_str()); - Serial.printf("[%lu] [SCT] Removed incomplete temp file after failed attempt\n", millis()); + if (!success) { + Serial.printf("[%lu] [SCT] Failed to stream item contents\n", millis()); + return false; } } - if (!success) { - Serial.printf("[%lu] [SCT] Failed to stream item contents to temp file after retries\n", millis()); - return false; - } - - Serial.printf("[%lu] [SCT] Streamed temp HTML to %s (%d bytes)\n", millis(), tmpHtmlPath.c_str(), fileSize); - - // Only show progress bar for larger chapters where rendering overhead is worth it + // Only show progress bar for larger chapters if (progressSetupFn && fileSize >= MIN_SIZE_FOR_PROGRESS) { progressSetupFn(); } @@ -183,15 +243,44 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c viewportHeight, hyphenationEnabled); std::vector lut = {}; - ChapterHtmlSlimParser visitor( - tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, + std::unique_ptr visitor(new ChapterHtmlSlimParser( + fileToParse, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn); - Hyphenator::setPreferredLanguage(epub->getLanguage()); - success = visitor.parseAndBuildPages(); + progressFn)); + + Hyphenator::setPreferredLanguage(epub->getLanguage()); + + // Track which inline footnotes AND paragraph notes are actually referenced in this file + std::set rewrittenInlineIds; + int noterefCount = 0; + + visitor->setNoterefCallback([this, ¬erefCount, &rewrittenInlineIds](Noteref& noteref) { + // Extract the ID from the href for tracking + std::string href(noteref.href); + + // Check if this was rewritten to an inline or paragraph note + if (href.find("inline_") == 0 || href.find("pnote_") == 0) { + size_t underscorePos = href.find('_'); + size_t dotPos = href.find('.'); + + if (underscorePos != std::string::npos && dotPos != std::string::npos) { + std::string noteId = href.substr(underscorePos + 1, dotPos - underscorePos - 1); + rewrittenInlineIds.insert(noteId); + } + } else { + // Normal external footnote + epub->markAsFootnotePage(noteref.href); + } + noterefCount++; + }); + + success = visitor->parseAndBuildPages(); + + if (!isVirtual) { + SdMan.remove(tmpHtmlPath.c_str()); + } - SdMan.remove(tmpHtmlPath.c_str()); if (!success) { Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis()); file.close(); @@ -199,9 +288,77 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c return false; } + // --- Footnote Generation Logic (Merged from HEAD) --- + + // Inline footnotes + for (int i = 0; i < visitor->inlineFootnoteCount; i++) { + const char* inlineId = visitor->inlineFootnotes[i].id; + const char* inlineText = visitor->inlineFootnotes[i].text; + + if (rewrittenInlineIds.find(std::string(inlineId)) == rewrittenInlineIds.end()) continue; + if (!inlineText || strlen(inlineText) == 0) continue; + + char inlineFilename[64]; + snprintf(inlineFilename, sizeof(inlineFilename), "inline_%s.html", inlineId); + std::string fullPath = epub->getCachePath() + "/" + std::string(inlineFilename); + + FsFile file; + if (SdMan.openFileForWrite("SCT", fullPath, file)) { + file.println(""); + file.println(""); + file.println(""); + file.println("Footnote"); + file.println(""); + file.print("

"); + writeEscapedXml(file, inlineText); + file.println("

"); + file.close(); + + int virtualIndex = epub->addVirtualSpineItem(fullPath); + char newHref[128]; + snprintf(newHref, sizeof(newHref), "%s#%s", inlineFilename, inlineId); + epub->markAsFootnotePage(newHref); + } + } + + // Paragraph notes + for (int i = 0; i < visitor->paragraphNoteCount; i++) { + const char* pnoteId = visitor->paragraphNotes[i].id; + const char* pnoteText = visitor->paragraphNotes[i].text; + + if (!pnoteText || strlen(pnoteText) == 0) continue; + if (rewrittenInlineIds.find(std::string(pnoteId)) == rewrittenInlineIds.end()) continue; + + char pnoteFilename[64]; + snprintf(pnoteFilename, sizeof(pnoteFilename), "pnote_%s.html", pnoteId); + std::string fullPath = epub->getCachePath() + "/" + std::string(pnoteFilename); + + FsFile file; + if (SdMan.openFileForWrite("SCT", fullPath, file)) { + file.println(""); + file.println(""); + file.println(""); + file.println("Note"); + file.println(""); + file.print("

"); + writeEscapedXml(file, pnoteText); + file.println("

"); + file.close(); + + int virtualIndex = epub->addVirtualSpineItem(fullPath); + char newHref[128]; + snprintf(newHref, sizeof(newHref), "%s#%s", pnoteFilename, pnoteId); + epub->markAsFootnotePage(newHref); + } + } + + // Write LUT (master) const uint32_t lutOffset = file.position(); bool hasFailedLutRecords = false; - // Write LUT for (const uint32_t& pos : lut) { if (pos == 0) { hasFailedLutRecords = true; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index f6d96be4..7fd9fd18 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -30,7 +30,6 @@ constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } -// given the start and end of a tag, check to see if it matches a known tag bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { for (int i = 0; i < possible_tag_count; i++) { if (strcmp(tag_name, possible_tags[i]) == 0) { @@ -40,46 +39,359 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +const char* getAttribute(const XML_Char** atts, const char* attrName) { + if (!atts) return nullptr; + + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], attrName) == 0) { + return atts[i + 1]; + } + } + return nullptr; +} + +// Simple HTML entity replacement for noteref text +std::string replaceHtmlEntities(const char* text) { + if (!text) return ""; + std::string s(text); + + // Replace common entities + size_t pos = 0; + while ((pos = s.find("<", pos)) != std::string::npos) { + s.replace(pos, 4, "<"); + pos += 1; + } + pos = 0; + while ((pos = s.find(">", pos)) != std::string::npos) { + s.replace(pos, 4, ">"); + pos += 1; + } + pos = 0; + while ((pos = s.find("&", pos)) != std::string::npos) { + s.replace(pos, 5, "&"); + pos += 1; + } + pos = 0; + while ((pos = s.find(""", pos)) != std::string::npos) { + s.replace(pos, 6, "\""); + pos += 1; + } + pos = 0; + while ((pos = s.find("'", pos)) != std::string::npos) { + s.replace(pos, 6, "'"); + pos += 1; + } + return s; +} + +EpdFontFamily::Style ChapterHtmlSlimParser::getCurrentFontStyle() const { + if (boldUntilDepth < depth && italicUntilDepth < depth) { + return EpdFontFamily::BOLD_ITALIC; + } else if (boldUntilDepth < depth) { + return EpdFontFamily::BOLD; + } else if (italicUntilDepth < depth) { + return EpdFontFamily::ITALIC; + } + return EpdFontFamily::REGULAR; +} + // flush the contents of partWordBuffer to currentTextBlock void ChapterHtmlSlimParser::flushPartWordBuffer() { - // determine font style - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (boldUntilDepth < depth && italicUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (boldUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (italicUntilDepth < depth) { - fontStyle = EpdFontFamily::ITALIC; - } + EpdFontFamily::Style fontStyle = getCurrentFontStyle(); // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; - currentTextBlock->addWord(partWordBuffer, fontStyle); + currentTextBlock->addWord(std::move(replaceHtmlEntities(partWordBuffer)), fontStyle); partWordBufferIndex = 0; } // start a new text block if needed void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { if (currentTextBlock) { - // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { currentTextBlock->setStyle(style); return; } - makePages(); } currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); } +void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const char* href) { + if (currentPageFootnoteCount >= 16) return; + + Serial.printf("[%lu] [ADDFT] Adding footnote: num=%s, href=%s\n", millis(), number, href); + + // Copy number + strncpy(currentPageFootnotes[currentPageFootnoteCount].number, number, 2); + currentPageFootnotes[currentPageFootnoteCount].number[2] = '\0'; + + // Check if this is an inline footnote reference + const char* hashPos = strchr(href, '#'); + if (hashPos) { + const char* inlineId = hashPos + 1; // Skip the '#' + + // Check if we have this inline footnote + bool foundInline = false; + for (int i = 0; i < inlineFootnoteCount; i++) { + if (strcmp(inlineFootnotes[i].id, inlineId) == 0) { + // This is an inline footnote! Rewrite the href + char rewrittenHref[64]; + snprintf(rewrittenHref, sizeof(rewrittenHref), "inline_%s.html#%s", inlineId, inlineId); + + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + + Serial.printf("[%lu] [ADDFT] Rewrote inline href to: %s\n", millis(), rewrittenHref); + foundInline = true; + break; + } + } + + // Check if we have this as a paragraph note + if (!foundInline) { + for (int i = 0; i < paragraphNoteCount; i++) { + if (strcmp(paragraphNotes[i].id, inlineId) == 0) { + char rewrittenHref[64]; + snprintf(rewrittenHref, sizeof(rewrittenHref), "pnote_%s.html#%s", inlineId, inlineId); + + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + + Serial.printf("[%lu] [ADDFT] Rewrote paragraph note href to: %s\n", millis(), rewrittenHref); + foundInline = true; + break; + } + } + } + + if (!foundInline) { + // Normal href, just copy it + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + } + } else { + // No anchor, just copy + strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); + currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + } + + currentPageFootnoteCount++; + + Serial.printf("[%lu] [ADDFT] Stored as: num=%s, href=%s\n", millis(), + currentPageFootnotes[currentPageFootnoteCount - 1].number, + currentPageFootnotes[currentPageFootnoteCount - 1].href); +} + void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); + // ============================================================================ + // PASS 1: Detect and collect

+ // ============================================================================ + if (strcmp(name, "p") == 0 && self->isPass1CollectingAsides) { + const char* classAttr = getAttribute(atts, "class"); + + if (classAttr && (strcmp(classAttr, "note") == 0 || strstr(classAttr, "note"))) { + Serial.printf("[%lu] [PNOTE] Found paragraph note (pass1=1)\n", millis()); + + self->insideParagraphNote = true; + self->paragraphNoteDepth = self->depth; + self->currentParagraphNoteTextLen = 0; + self->currentParagraphNoteText[0] = '\0'; + self->currentParagraphNoteId[0] = '\0'; + + self->depth += 1; + return; + } + } + + // Inside paragraph note in Pass 1, look for + if (self->insideParagraphNote && self->isPass1CollectingAsides && strcmp(name, "a") == 0) { + const char* id = getAttribute(atts, "id"); + + if (id && strncmp(id, "rnote", 5) == 0) { + strncpy(self->currentParagraphNoteId, id, 15); + self->currentParagraphNoteId[15] = '\0'; + Serial.printf("[%lu] [PNOTE] Found note ID: %s\n", millis(), id); + } + + self->depth += 1; + return; + } + + // ============================================================================ + // PASS 1: Detect and collect