From c51d61736847524c23851e8e4f890c3d393c023f Mon Sep 17 00:00:00 2001 From: Uri Tauber Date: Fri, 30 Jan 2026 10:52:37 +0200 Subject: [PATCH] Fix footnote placement for paragraphs spanning multiple pages --- lib/Epub/Epub/ParsedText.cpp | 50 ++++++++++-- lib/Epub/Epub/ParsedText.h | 14 +++- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 78 +++++++++---------- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 8 +- 4 files changed, 90 insertions(+), 60 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 81d688ec..b44baee5 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -49,16 +49,24 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s } // namespace -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, + std::unique_ptr footnote) { if (word.empty()) return; words.push_back(std::move(word)); wordStyles.push_back(fontStyle); + if (footnote) { + wordHasFootnote.push_back(1); + footnoteQueue.push_back(*footnote); + } else { + wordHasFootnote.push_back(0); + } } // Consumes data to minimize memory usage void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const uint16_t viewportWidth, - const std::function)>& processLine, + const std::function, + const std::vector&)>& processLine, const bool includeLastLine) { if (words.empty()) { return; @@ -255,8 +263,8 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r return lineBreakIndices; } -// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the -// available width. +// Splits words[wordIndex] into prefix (adding a hyphen only when needed) +// and remainder when a legal breakpoint fits the available width. bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer, const int fontId, std::vector& wordWidths, const bool allowFallbackBreaks) { @@ -320,6 +328,13 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl words.insert(insertWordIt, remainder); wordStyles.insert(insertStyleIt, style); + // Split wordHasFootnote as well. The footnote (if any) is associated with the remainder word. + auto wordHasFootnoteIt = wordHasFootnote.begin(); + std::advance(wordHasFootnoteIt, wordIndex); + uint8_t hasFootnote = *wordHasFootnoteIt; + *wordHasFootnoteIt = 0; // First part doesn't have it anymore + wordHasFootnote.insert(std::next(wordHasFootnoteIt), hasFootnote); + // Update cached widths to reflect the new prefix/remainder pairing. wordWidths[wordIndex] = static_cast(chosenWidth); const uint16_t remainderWidth = measureWordWidth(renderer, fontId, remainder, style); @@ -329,7 +344,8 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth, const std::vector& wordWidths, const std::vector& lineBreakIndices, - const std::function)>& processLine) { + const std::function, const std::vector&)>& + processLine) { const size_t lineBreak = lineBreakIndices[breakIndex]; const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lineWordCount = lineBreak - lastBreakAt; @@ -372,17 +388,35 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); - // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); + // Extract footnote flags from deque + std::vector lineFootnotes; + for (size_t i = 0; i < lineWordCount; i++) { + if (!wordHasFootnote.empty()) { + uint8_t hasFn = wordHasFootnote.front(); + wordHasFootnote.pop_front(); + + if (hasFn) { + if (footnoteQueue.empty()) { + Serial.printf("[%lu] [ERROR] Footnote flag set but queue empty! Flags/queue out of sync.\n", millis()); + break; + } + lineFootnotes.push_back(footnoteQueue.front()); + footnoteQueue.pop_front(); + } + } + } + for (auto& word : lineWords) { if (containsSoftHyphen(word)) { stripSoftHyphensInPlace(word); } } - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); -} \ No newline at end of file + processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style), + lineFootnotes); +} diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index e72db7ef..f771a197 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -2,12 +2,14 @@ #include +#include #include #include #include #include #include +#include "FootnoteEntry.h" #include "blocks/TextBlock.h" class GfxRenderer; @@ -15,6 +17,8 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; + std::deque wordHasFootnote; + std::deque footnoteQueue; TextBlock::Style style; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -28,7 +32,8 @@ class ParsedText { std::vector& wordWidths, bool allowFallbackBreaks); void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector& wordWidths, const std::vector& lineBreakIndices, - const std::function)>& processLine); + const std::function, const std::vector&)>& + processLine); std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: @@ -37,12 +42,13 @@ class ParsedText { : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; - void addWord(std::string word, EpdFontFamily::Style fontStyle); + void addWord(std::string word, EpdFontFamily::Style fontStyle, std::unique_ptr footnote = nullptr); void setStyle(const TextBlock::Style style) { this->style = style; } TextBlock::Style getStyle() const { return style; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, - const std::function)>& processLine, + const std::function, const std::vector&)>& + processLine, bool includeLastLine = true); -}; \ No newline at end of file +}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 728f7203..72c2c673 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -116,14 +116,14 @@ void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); } -void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const char* href) { - if (currentPageFootnoteCount >= 16) return; +std::unique_ptr ChapterHtmlSlimParser::createFootnoteEntry(const char* number, const char* href) { + auto entry = std::unique_ptr(new FootnoteEntry()); - Serial.printf("[%lu] [ADDFT] Adding footnote: num=%s, href=%s\n", millis(), number, href); + Serial.printf("[%lu] [ADDFT] Creating footnote: num=%s, href=%s\n", millis(), number, href); // Copy number - strncpy(currentPageFootnotes[currentPageFootnoteCount].number, number, 2); - currentPageFootnotes[currentPageFootnoteCount].number[2] = '\0'; + strncpy(entry->number, number, 2); + entry->number[2] = '\0'; // Check if this is an inline footnote reference const char* hashPos = strchr(href, '#'); @@ -138,8 +138,8 @@ void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const c char rewrittenHref[64]; snprintf(rewrittenHref, sizeof(rewrittenHref), "inline_%s.html#%s", inlineId, inlineId); - strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); - currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + strncpy(entry->href, rewrittenHref, 63); + entry->href[63] = '\0'; Serial.printf("[%lu] [ADDFT] Rewrote inline href to: %s\n", millis(), rewrittenHref); foundInline = true; @@ -154,8 +154,8 @@ void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const c char rewrittenHref[64]; snprintf(rewrittenHref, sizeof(rewrittenHref), "pnote_%s.html#%s", inlineId, inlineId); - strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); - currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + strncpy(entry->href, rewrittenHref, 63); + entry->href[63] = '\0'; Serial.printf("[%lu] [ADDFT] Rewrote paragraph note href to: %s\n", millis(), rewrittenHref); foundInline = true; @@ -166,20 +166,17 @@ void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const c if (!foundInline) { // Normal href, just copy it - strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); - currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + strncpy(entry->href, href, 63); + entry->href[63] = '\0'; } } else { // No anchor, just copy - strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); - currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; + strncpy(entry->href, href, 63); + entry->href[63] = '\0'; } - currentPageFootnoteCount++; - - Serial.printf("[%lu] [ADDFT] Stored as: num=%s, href=%s\n", millis(), - currentPageFootnotes[currentPageFootnoteCount - 1].number, - currentPageFootnotes[currentPageFootnoteCount - 1].href); + Serial.printf("[%lu] [ADDFT] Created as: num=%s, href=%s\n", millis(), entry->number, entry->href); + return entry; } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { @@ -593,7 +590,10 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char Serial.printf("[%lu] [EHP] Text block too long, splitting into multiple pages\n", millis()); self->currentTextBlock->layoutAndExtractLines( self->renderer, self->fontId, self->viewportWidth, - [self](const std::shared_ptr& textBlock) { self->addLineToPage(textBlock); }, false); + [self](const std::shared_ptr& textBlock, const std::vector& footnotes) { + self->addLineToPage(textBlock, footnotes); + }, + false); } } @@ -688,18 +688,17 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n if (self->currentNoterefTextLen > 0) { Serial.printf("[%lu] [NOTEREF] %s -> %s\n", millis(), self->currentNoterefText, self->currentNoterefHref); - // Add footnote first (this does the rewriting) - self->addFootnoteToCurrentPage(self->currentNoterefText, self->currentNoterefHref); + // Create the footnote entry (this does the rewriting) + std::unique_ptr footnote = + self->createFootnoteEntry(self->currentNoterefText, self->currentNoterefHref); - // Then call callback with the REWRITTEN href from currentPageFootnotes - if (self->noterefCallback && self->currentPageFootnoteCount > 0) { + // Then call callback with the REWRITTEN href + if (self->noterefCallback && footnote) { Noteref noteref; strncpy(noteref.number, self->currentNoterefText, 15); noteref.number[15] = '\0'; - // Use the STORED href which has been rewritten - FootnoteEntry* lastFootnote = &self->currentPageFootnotes[self->currentPageFootnoteCount - 1]; - strncpy(noteref.href, lastFootnote->href, 127); + strncpy(noteref.href, footnote->href, 127); noteref.href[127] = '\0'; self->noterefCallback(noteref); @@ -712,9 +711,9 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n char formattedNoteref[32]; snprintf(formattedNoteref, sizeof(formattedNoteref), "[%s]", self->currentNoterefText); - // Add it as a word to the current text block + // Add it as a word to the current text block with the footnote attached if (self->currentTextBlock) { - self->currentTextBlock->addWord(formattedNoteref, fontStyle); + self->currentTextBlock->addWord(formattedNoteref, fontStyle, std::move(footnote)); } } @@ -846,7 +845,6 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() { partWordBufferIndex = 0; insideNoteref = false; insideAsideFootnote = false; - currentPageFootnoteCount = 0; isPass1CollectingAsides = false; supDepth = -1; @@ -928,10 +926,6 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() { makePages(); if (currentPage) { - for (int i = 0; i < currentPageFootnoteCount; i++) { - currentPage->addFootnote(currentPageFootnotes[i].number, currentPageFootnotes[i].href); - } - currentPageFootnoteCount = 0; completePageFn(std::move(currentPage)); } @@ -942,17 +936,10 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() { return true; } -void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line) { +void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line, const std::vector& footnotes) { const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; if (currentPageNextY + lineHeight > viewportHeight) { - if (currentPage) { - for (int i = 0; i < currentPageFootnoteCount; i++) { - currentPage->addFootnote(currentPageFootnotes[i].number, currentPageFootnotes[i].href); - } - currentPageFootnoteCount = 0; - } - completePageFn(std::move(currentPage)); currentPage.reset(new Page()); currentPageNextY = 0; @@ -961,6 +948,11 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line) { if (currentPage && currentPage->elements.size() < 24) { // Assuming generic capacity check or vector size currentPage->elements.push_back(std::make_shared(line, 0, currentPageNextY)); currentPageNextY += lineHeight; + + // Add footnotes for this line to the current page + for (const auto& fn : footnotes) { + currentPage->addFootnote(fn.number, fn.href); + } } else if (currentPage) { Serial.printf("[%lu] [EHP] WARNING: Page element capacity reached, skipping element\n", millis()); } @@ -980,7 +972,9 @@ void ChapterHtmlSlimParser::makePages() { const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; currentTextBlock->layoutAndExtractLines( renderer, fontId, viewportWidth, - [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); + [this](const std::shared_ptr& textBlock, const std::vector& footnotes) { + addLineToPage(textBlock, footnotes); + }); // Extra paragraph spacing if enabled if (extraParagraphSpacing) { currentPageNextY += lineHeight / 2; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index ed66b713..474f18bf 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -77,10 +77,6 @@ class ChapterHtmlSlimParser { int currentNoterefHrefLen = 0; std::function noterefCallback = nullptr; - // Footnote tracking for current page - FootnoteEntry currentPageFootnotes[16]; - int currentPageFootnoteCount = 0; - // Inline footnotes (aside) tracking bool insideAsideFootnote = false; int asideDepth = 0; @@ -106,7 +102,7 @@ class ChapterHtmlSlimParser { int supDepth = -1; int anchorDepth = -1; - void addFootnoteToCurrentPage(const char* number, const char* href); + std::unique_ptr createFootnoteEntry(const char* number, const char* href); void startNewTextBlock(TextBlock::Style style); EpdFontFamily::Style getCurrentFontStyle() const; void flushPartWordBuffer(); @@ -161,7 +157,7 @@ class ChapterHtmlSlimParser { } bool parseAndBuildPages(); - void addLineToPage(std::shared_ptr line); + void addLineToPage(std::shared_ptr line, const std::vector& footnotes); void setNoterefCallback(const std::function& callback) { noterefCallback = callback; } };