Add support for paragraph notes

This commit is contained in:
Jérôme Launay 2025-12-17 04:46:27 +01:00
parent 112624d096
commit 62a3092f4a
5 changed files with 271 additions and 28 deletions

View File

@ -214,27 +214,28 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
[this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); }, [this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); },
cachePath); cachePath);
// Track which inline footnotes are actually referenced in this file // Track which inline footnotes AND paragraph notes are actually referenced in this file
std::set<std::string> rewrittenInlineIds; std::set<std::string> rewrittenInlineIds;
int noterefCount = 0; int noterefCount = 0;
visitor.setNoterefCallback([this, &noterefCount, &rewrittenInlineIds](Noteref& noteref) { visitor.setNoterefCallback([this, &noterefCount, &rewrittenInlineIds](Noteref& noteref) {
Serial.printf("[%lu] [SCT] Callback noteref: %s -> %s\n", millis(), noteref.number, noteref.href); Serial.printf("[%lu] [SCT] Callback noteref: %s -> %s\n", millis(), noteref.number, noteref.href);
// Check if this was rewritten to an inline footnote // Extract the ID from the href for tracking
std::string href(noteref.href); std::string href(noteref.href);
if (href.find("inline_") == 0) {
// Extract ID from "inline_N3.html#N3" // Check if this was rewritten to an inline or paragraph note
if (href.find("inline_") == 0 || href.find("pnote_") == 0) {
size_t underscorePos = href.find('_'); size_t underscorePos = href.find('_');
size_t dotPos = href.find('.'); size_t dotPos = href.find('.');
if (underscorePos != std::string::npos && dotPos != std::string::npos) { if (underscorePos != std::string::npos && dotPos != std::string::npos) {
std::string inlineId = href.substr(underscorePos + 1, dotPos - underscorePos - 1); std::string noteId = href.substr(underscorePos + 1, dotPos - underscorePos - 1);
rewrittenInlineIds.insert(inlineId); rewrittenInlineIds.insert(noteId);
Serial.printf("[%lu] [SCT] Marked inline footnote as rewritten: %s\n", Serial.printf("[%lu] [SCT] Marked note as rewritten: %s\n",
millis(), inlineId.c_str()); millis(), noteId.c_str());
} }
} else { }else {
// Normal external footnote // Normal external footnote
epub->markAsFootnotePage(noteref.href); epub->markAsFootnotePage(noteref.href);
} }
@ -312,7 +313,6 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
Serial.printf("[%lu] [SCT] Generated inline footnote file\n", millis()); Serial.printf("[%lu] [SCT] Generated inline footnote file\n", millis());
// Add as virtual spine item (full path for epub to find it)
int virtualIndex = epub->addVirtualSpineItem(fullPath); int virtualIndex = epub->addVirtualSpineItem(fullPath);
Serial.printf("[%lu] [SCT] Added virtual spine item at index %d\n", millis(), virtualIndex); Serial.printf("[%lu] [SCT] Added virtual spine item at index %d\n", millis(), virtualIndex);
@ -325,6 +325,65 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
} }
} }
// Generate paragraph note HTML files
Serial.printf("[%lu] [SCT] Found %d paragraph notes\n", millis(), visitor.paragraphNoteCount);
for (int i = 0; i < visitor.paragraphNoteCount; i++) {
const char* pnoteId = visitor.paragraphNotes[i].id;
const char* pnoteText = visitor.paragraphNotes[i].text;
if (!pnoteText || strlen(pnoteText) == 0) {
continue;
}
// Check if this paragraph note was referenced
if (rewrittenInlineIds.find(std::string(pnoteId)) == rewrittenInlineIds.end()) {
Serial.printf("[%lu] [SCT] Skipping unreferenced paragraph note: %s\n", millis(), pnoteId);
continue;
}
// Create filename: pnote_rnote1.html
char pnoteFilename[64];
snprintf(pnoteFilename, sizeof(pnoteFilename), "pnote_%s.html", pnoteId);
std::string fullPath = epub->getCachePath() + "/" + std::string(pnoteFilename);
Serial.printf("[%lu] [SCT] Generating paragraph note file: %s\n", millis(), fullPath.c_str());
File file = SD.open(fullPath.c_str(), FILE_WRITE, true);
if (file) {
file.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
file.println("<!DOCTYPE html>");
file.println("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
file.println("<head>");
file.println("<meta charset=\"UTF-8\"/>");
file.println("<title>Note</title>");
file.println("</head>");
file.println("<body>");
file.print("<p id=\"");
file.print(pnoteId);
file.print("\">");
if (!writeEscapedXml(file, pnoteText)) {
Serial.printf("[%lu] [SCT] Warning: writeEscapedXml may have failed\n", millis());
}
file.println("</p>");
file.println("</body>");
file.println("</html>");
file.close();
Serial.printf("[%lu] [SCT] Generated paragraph note file\n", millis());
int virtualIndex = epub->addVirtualSpineItem(fullPath);
Serial.printf("[%lu] [SCT] Added virtual spine item at index %d\n", millis(), virtualIndex);
char newHref[128];
snprintf(newHref, sizeof(newHref), "%s#%s", pnoteFilename, pnoteId);
epub->markAsFootnotePage(newHref);
}
}
Serial.printf("[%lu] [SCT] Total noterefs found: %d\n", millis(), noterefCount); Serial.printf("[%lu] [SCT] Total noterefs found: %d\n", millis(), noterefCount);
writeCacheMetadata(fontId, lineCompression, marginTop, marginRight, marginBottom, marginLeft, extraParagraphSpacing); writeCacheMetadata(fontId, lineCompression, marginTop, marginRight, marginBottom, marginLeft, extraParagraphSpacing);
@ -343,4 +402,4 @@ std::unique_ptr<Page> Section::loadPageFromSD() const {
auto page = Page::deserialize(inputFile); auto page = Page::deserialize(inputFile);
inputFile.close(); inputFile.close();
return page; return page;
} }

View File

@ -84,12 +84,29 @@ void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const c
strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63); strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63);
currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0'; currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0';
Serial.printf("[%lu] [ADDFT] Rewrote inline href to: %s\n", millis(), rewrittenHref); Serial.printf("[%lu] [ADDFT] Rewrote inline href to: %s\n", millis(), rewrittenHref);
foundInline = true; foundInline = true;
break; break;
} }
} }
//Check if we have this as a paragraph note
if (!foundInline) {
for (int i = 0; i < paragraphNoteCount; i++) {
if (strcmp(paragraphNotes[i].id, inlineId) == 0) {
char rewrittenHref[64];
snprintf(rewrittenHref, sizeof(rewrittenHref), "pnote_%s.html#%s", inlineId, inlineId);
strncpy(currentPageFootnotes[currentPageFootnoteCount].href, rewrittenHref, 63);
currentPageFootnotes[currentPageFootnoteCount].href[63] = '\0';
Serial.printf("[%lu] [ADDFT] Rewrote paragraph note href to: %s\n", millis(), rewrittenHref);
foundInline = true;
break;
}
}
}
if (!foundInline) { if (!foundInline) {
// Normal href, just copy it // Normal href, just copy it
strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63); strncpy(currentPageFootnotes[currentPageFootnoteCount].href, href, 63);
@ -112,6 +129,43 @@ void ChapterHtmlSlimParser::addFootnoteToCurrentPage(const char* number, const c
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// ============================================================================
// PASS 1: Detect and collect <p class="note">
// ============================================================================
if (strcmp(name, "p") == 0 && self->isPass1CollectingAsides) {
const char* classAttr = getAttribute(atts, "class");
if (classAttr && (strcmp(classAttr, "note") == 0 || strstr(classAttr, "note"))) {
Serial.printf("[%lu] [PNOTE] Found paragraph note (pass1=1)\n", millis());
self->insideParagraphNote = true;
self->paragraphNoteDepth = self->depth;
self->currentParagraphNoteTextLen = 0;
self->currentParagraphNoteText[0] = '\0';
self->currentParagraphNoteId[0] = '\0';
self->depth += 1;
return;
}
}
// Inside paragraph note in Pass 1, look for <a id="rnoteX">
if (self->insideParagraphNote && self->isPass1CollectingAsides && strcmp(name, "a") == 0) {
const char* id = getAttribute(atts, "id");
if (id && strncmp(id, "rnote", 5) == 0) {
strncpy(self->currentParagraphNoteId, id, 15);
self->currentParagraphNoteId[15] = '\0';
Serial.printf("[%lu] [PNOTE] Found note ID: %s\n", millis(), id);
}
self->depth += 1;
return;
}
// ============================================================================
// PASS 1: Detect and collect <aside epub:type="footnote">
// ============================================================================
if (strcmp(name, "aside") == 0) { if (strcmp(name, "aside") == 0) {
const char* epubType = getAttribute(atts, "epub:type"); const char* epubType = getAttribute(atts, "epub:type");
const char* id = getAttribute(atts, "id"); const char* id = getAttribute(atts, "id");
@ -130,8 +184,8 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
strncpy(self->currentAsideId, id, 2); strncpy(self->currentAsideId, id, 2);
self->currentAsideId[2] = '\0'; self->currentAsideId[2] = '\0';
} else { } else {
// Pass 2: Find the aside text and output it as normal content // Pass 2: Skip the aside (we already have it from Pass 1)
Serial.printf("[%lu] [ASIDE] Rendering aside as content in Pass 2: id=%s\n", millis(), id); Serial.printf("[%lu] [ASIDE] Skipping aside in Pass 2: id=%s\n", millis(), id);
// Find the inline footnote text // Find the inline footnote text
for (int i = 0; i < self->inlineFootnoteCount; i++) { for (int i = 0; i < self->inlineFootnoteCount; i++) {
@ -159,18 +213,32 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
} }
} }
// During pass 1, we ONLY collect asides, skip everything else // ============================================================================
// PASS 1: Skip everything else
// ============================================================================
if (self->isPass1CollectingAsides) { if (self->isPass1CollectingAsides) {
self->depth += 1; self->depth += 1;
return; return;
} }
// Pass 2: Normal parsing, but skip asides (we already have them) // ============================================================================
if (self->insideAsideFootnote) { // PASS 2: Skip <p class="note"> (we already have them from Pass 1)
self->depth += 1; // ============================================================================
return; if (strcmp(name, "p") == 0) {
const char* classAttr = getAttribute(atts, "class");
if (classAttr && (strcmp(classAttr, "note") == 0 || strstr(classAttr, "note"))) {
Serial.printf("[%lu] [PNOTE] Skipping paragraph note in Pass 2\n", millis());
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
} }
// ============================================================================
// PASS 2: Normal parsing
// ============================================================================
// Middle of skip // Middle of skip
if (self->skipUntilDepth < self->depth) { if (self->skipUntilDepth < self->depth) {
self->depth += 1; self->depth += 1;
@ -182,7 +250,16 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
const char* epubType = getAttribute(atts, "epub:type"); const char* epubType = getAttribute(atts, "epub:type");
const char* href = getAttribute(atts, "href"); const char* href = getAttribute(atts, "href");
if (epubType && strcmp(epubType, "noteref") == 0) { // Detect epub:type="noteref" OR href="#rnoteX" pattern
bool isNoteref = (epubType && strcmp(epubType, "noteref") == 0);
// Also detect links with href starting with "#rnote" (reverse note pattern)
if (!isNoteref && href && href[0] == '#' && strncmp(href + 1, "rnote", 5) == 0) {
isNoteref = true;
Serial.printf("[%lu] [NOTEREF] Detected reverse note pattern: href=%s\n", millis(), href);
}
if (isNoteref) {
Serial.printf("[%lu] [NOTEREF] Found noteref: href=%s\n", millis(), href ? href : "null"); Serial.printf("[%lu] [NOTEREF] Found noteref: href=%s\n", millis(), href ? href : "null");
self->insideNoteref = true; self->insideNoteref = true;
self->currentNoterefTextLen = 0; self->currentNoterefTextLen = 0;
@ -237,6 +314,32 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) { void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
//Collect paragraph note text in Pass 1
if (self->insideParagraphNote && self->isPass1CollectingAsides) {
for (int i = 0; i < len; i++) {
if (self->currentParagraphNoteTextLen >= self->MAX_PNOTE_BUFFER - 2) {
if (self->currentParagraphNoteTextLen == self->MAX_PNOTE_BUFFER - 2) {
Serial.printf("[%lu] [PNOTE] WARNING: Note text truncated at %d chars\n",
millis(), self->MAX_PNOTE_BUFFER - 2);
}
break;
}
unsigned char c = (unsigned char)s[i];
if (isWhitespace(c)) {
if (self->currentParagraphNoteTextLen > 0 &&
self->currentParagraphNoteText[self->currentParagraphNoteTextLen - 1] != ' ') {
self->currentParagraphNoteText[self->currentParagraphNoteTextLen++] = ' ';
}
} else if (c >= 32 || c >= 0x80) { // Accept printable ASCII AND UTF-8
self->currentParagraphNoteText[self->currentParagraphNoteTextLen++] = c;
}
}
self->currentParagraphNoteText[self->currentParagraphNoteTextLen] = '\0';
return;
}
// If inside aside, collect the text ONLY in pass 1 // If inside aside, collect the text ONLY in pass 1
if (self->insideAsideFootnote) { if (self->insideAsideFootnote) {
if (!self->isPass1CollectingAsides) { if (!self->isPass1CollectingAsides) {
@ -276,8 +379,10 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
// Rest of characterData logic for pass 2... // Rest of characterData logic for pass 2...
if (self->insideNoteref) { if (self->insideNoteref) {
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
if (!isWhitespace(s[i]) && self->currentNoterefTextLen < 15) { unsigned char c = (unsigned char)s[i];
self->currentNoterefText[self->currentNoterefTextLen++] = s[i]; // Skip whitespace and brackets []
if (!isWhitespace(c) && c != '[' && c != ']' && self->currentNoterefTextLen < 15) {
self->currentNoterefText[self->currentNoterefTextLen++] = c;
self->currentNoterefText[self->currentNoterefTextLen] = '\0'; self->currentNoterefText[self->currentNoterefTextLen] = '\0';
} }
} }
@ -320,6 +425,42 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
//Closing paragraph note in Pass 1
if (strcmp(name, "p") == 0 && self->insideParagraphNote &&
self->depth - 1 == self->paragraphNoteDepth) {
if (self->isPass1CollectingAsides &&
self->currentParagraphNoteTextLen > 0 &&
self->paragraphNoteCount < 32 &&
self->currentParagraphNoteId[0] != '\0') {
// Copy ID
strncpy(self->paragraphNotes[self->paragraphNoteCount].id,
self->currentParagraphNoteId, 15);
self->paragraphNotes[self->paragraphNoteCount].id[15] = '\0';
// Allocate memory for text
size_t textLen = strlen(self->currentParagraphNoteText);
self->paragraphNotes[self->paragraphNoteCount].text =
static_cast<char*>(malloc(textLen + 1));
if (self->paragraphNotes[self->paragraphNoteCount].text) {
strcpy(self->paragraphNotes[self->paragraphNoteCount].text,
self->currentParagraphNoteText);
Serial.printf("[%lu] [PNOTE] Stored: %s -> %.80s... (allocated %d bytes)\n",
millis(), self->currentParagraphNoteId,
self->currentParagraphNoteText, textLen + 1);
self->paragraphNoteCount++;
}
}
self->insideParagraphNote = false;
self->depth -= 1;
return;
}
// Closing aside - handle differently for Pass 1 vs Pass 2 // Closing aside - handle differently for Pass 1 vs Pass 2
if (strcmp(name, "aside") == 0 && self->insideAsideFootnote && if (strcmp(name, "aside") == 0 && self->insideAsideFootnote &&
self->depth - 1 == self->asideDepth) { self->depth - 1 == self->asideDepth) {
@ -446,7 +587,9 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
depth = 0; depth = 0;
skipUntilDepth = INT_MAX; skipUntilDepth = INT_MAX;
insideAsideFootnote = false; insideAsideFootnote = false;
insideParagraphNote = false;
inlineFootnoteCount = 0; inlineFootnoteCount = 0;
paragraphNoteCount = 0;
isPass1CollectingAsides = true; isPass1CollectingAsides = true;
XML_Parser parser1 = XML_ParserCreate(nullptr); XML_Parser parser1 = XML_ParserCreate(nullptr);

View File

@ -30,6 +30,26 @@ struct InlineFootnote {
} }
}; };
// Struct to store collected inline footnotes from <p class="note">
struct ParagraphNote {
char id[16]; // ID from <a id="rnote1">
char* text; // Pointer to dynamically allocated text
ParagraphNote() : text(nullptr) {
id[0] = '\0';
}
~ParagraphNote() {
if (text) {
free(text);
text = nullptr;
}
}
ParagraphNote(const ParagraphNote&) = delete;
ParagraphNote& operator=(const ParagraphNote&) = delete;
};
class ChapterHtmlSlimParser { class ChapterHtmlSlimParser {
const char* filepath; const char* filepath;
GfxRenderer& renderer; GfxRenderer& renderer;
@ -68,6 +88,14 @@ class ChapterHtmlSlimParser {
int asideDepth = 0; int asideDepth = 0;
char currentAsideId[3] = {0}; char currentAsideId[3] = {0};
//Paragraph note tracking
bool insideParagraphNote = false;
int paragraphNoteDepth = 0;
char currentParagraphNoteId[16] = {0};
static constexpr int MAX_PNOTE_BUFFER = 512;
char currentParagraphNoteText[MAX_PNOTE_BUFFER] = {0};
int currentParagraphNoteTextLen = 0;
// Temporary buffer for accumulation, will be copied to dynamic allocation // Temporary buffer for accumulation, will be copied to dynamic allocation
static constexpr int MAX_ASIDE_BUFFER = 2048; static constexpr int MAX_ASIDE_BUFFER = 2048;
char currentAsideText[MAX_ASIDE_BUFFER] = {0}; char currentAsideText[MAX_ASIDE_BUFFER] = {0};
@ -89,9 +117,12 @@ class ChapterHtmlSlimParser {
static void XMLCALL endElement(void* userData, const XML_Char* name); static void XMLCALL endElement(void* userData, const XML_Char* name);
public: public:
// PUBLIC ACCESS to inline footnotes (needed by Section.cpp) // inline footnotes
InlineFootnote inlineFootnotes[16]; InlineFootnote inlineFootnotes[16];
int inlineFootnoteCount = 0; int inlineFootnoteCount = 0;
//paragraph notes
ParagraphNote paragraphNotes[32];
int paragraphNoteCount = 0;
explicit ChapterHtmlSlimParser(const char* filepath, GfxRenderer& renderer, const int fontId, explicit ChapterHtmlSlimParser(const char* filepath, GfxRenderer& renderer, const int fontId,
const float lineCompression, const int marginTop, const int marginRight, const float lineCompression, const int marginTop, const int marginRight,

View File

@ -61,7 +61,7 @@ void EpubReaderFootnotesScreen::render() {
renderer.drawText(READER_FONT_ID, marginLeft, 20, "Footnotes", BOLD); renderer.drawText(READER_FONT_ID, marginLeft, 20, "Footnotes", BOLD);
if (footnotes.getCount() == 0) { if (footnotes.getCount() == 0) {
renderer.drawText(SMALL_FONT_ID, marginLeft, startY, "No footnotes on this page"); renderer.drawText(SMALL_FONT_ID, marginLeft, startY + 20, "No footnotes on this page");
renderer.displayBuffer(); renderer.displayBuffer();
return; return;
} }

View File

@ -488,19 +488,29 @@ void EpubReaderScreen::navigateToHref(const char* href, bool savePosition) {
int targetSpineIndex = -1; int targetSpineIndex = -1;
// FIRST: Check if we have an inline footnote for this anchor // FIRST: Check if we have an inline footnote or paragraph note for this anchor
if (!anchor.empty()) { if (!anchor.empty()) {
// Try inline footnote first
std::string inlineFilename = "inline_" + anchor + ".html"; std::string inlineFilename = "inline_" + anchor + ".html";
Serial.printf("[%lu] [ERS] Looking for inline footnote: %s\n", Serial.printf("[%lu] [ERS] Looking for inline footnote: %s\n",
millis(), inlineFilename.c_str()); millis(), inlineFilename.c_str());
targetSpineIndex = epub->findVirtualSpineIndex(inlineFilename); targetSpineIndex = epub->findVirtualSpineIndex(inlineFilename);
// If not found, try paragraph note
if (targetSpineIndex == -1) {
std::string pnoteFilename = "pnote_" + anchor + ".html";
Serial.printf("[%lu] [ERS] Looking for paragraph note: %s\n",
millis(), pnoteFilename.c_str());
targetSpineIndex = epub->findVirtualSpineIndex(pnoteFilename);
}
if (targetSpineIndex != -1) { if (targetSpineIndex != -1) {
Serial.printf("[%lu] [ERS] Found inline footnote at index: %d\n", Serial.printf("[%lu] [ERS] Found note at virtual index: %d\n",
millis(), targetSpineIndex); millis(), targetSpineIndex);
// Navigate to inline footnote // Navigate to the note
xSemaphoreTake(renderingMutex, portMAX_DELAY); xSemaphoreTake(renderingMutex, portMAX_DELAY);
currentSpineIndex = targetSpineIndex; currentSpineIndex = targetSpineIndex;
nextPageNumber = 0; nextPageNumber = 0;
@ -510,7 +520,7 @@ void EpubReaderScreen::navigateToHref(const char* href, bool savePosition) {
updateRequired = true; updateRequired = true;
return; return;
} else { } else {
Serial.printf("[%lu] [ERS] No inline footnote found, trying normal navigation\n", Serial.printf("[%lu] [ERS] No virtual note found, trying normal navigation\n",
millis()); millis());
} }
} }