Adding support for sup footnotes.

This commit is contained in:
Uri Tauber 2026-01-25 23:05:31 +02:00
parent 71138a158d
commit 2ad75f3ebe
2 changed files with 135 additions and 66 deletions

View File

@ -281,34 +281,77 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
} }
// Rest of startElement logic for pass 2... // Rest of startElement logic for pass 2...
if (strcmp(name, "sup") == 0) {
self->supDepth = self->depth;
// Case A: Found <sup> inside a normal <a> (which wasn't marked as a note yet)
// Example: <a href="..."><sup>*</sup></a>
if (self->anchorDepth != -1 && !self->insideNoteref) {
Serial.printf("[%lu] [NOTEREF] Found <sup> inside <a>, promoting to noteref\n", millis());
// 1. Flush the current word buffer (text before the sup is normal text)
if (self->partWordBufferIndex > 0) {
// Copy of the existing flush logic
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) fontStyle = EpdFontFamily::BOLD_ITALIC;
else if (self->boldUntilDepth < self->depth) fontStyle = EpdFontFamily::BOLD;
else if (self->italicUntilDepth < self->depth) fontStyle = EpdFontFamily::ITALIC;
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
// 2. Activate footnote mode
self->insideNoteref = true;
self->currentNoterefTextLen = 0;
self->currentNoterefText[0] = '\0';
// Note: The href was already saved to currentNoterefHref when the <a> was opened (see below)
}
}
// === Update the existing A block ===
if (strcmp(name, "a") == 0) { if (strcmp(name, "a") == 0) {
const char* epubType = getAttribute(atts, "epub:type"); const char* epubType = getAttribute(atts, "epub:type");
const char* href = getAttribute(atts, "href"); const char* href = getAttribute(atts, "href");
// Detect epub:type="noteref" OR href="#rnoteX" pattern // Save Anchor state
self->anchorDepth = self->depth;
// Optimistically save the href, in case this becomes a footnote later (via internal <sup>)
if (!self->insideNoteref) {
if (href) {
strncpy(self->currentNoterefHref, href, 127);
self->currentNoterefHref[127] = '\0';
} else {
self->currentNoterefHref[0] = '\0';
}
}
// Footnote detection: via epub:type, rnote pattern, or if we are already inside a <sup>
// Case B: Found <a> inside <sup>
// Example: <sup><a href="...">1</a></sup>
bool isNoteref = (epubType && strcmp(epubType, "noteref") == 0); bool isNoteref = (epubType && strcmp(epubType, "noteref") == 0);
// Also detect links with href starting with "#rnote" (reverse note pattern)
if (!isNoteref && href && href[0] == '#' && strncmp(href + 1, "rnote", 5) == 0) { if (!isNoteref && href && href[0] == '#' && strncmp(href + 1, "rnote", 5) == 0) {
isNoteref = true; isNoteref = true;
Serial.printf("[%lu] [NOTEREF] Detected reverse note pattern: href=%s\n", millis(), href); }
// New detection: if we are inside SUP, this link is a footnote
if (!isNoteref && self->supDepth != -1) {
isNoteref = true;
Serial.printf("[%lu] [NOTEREF] Found <a> inside <sup>, treating as noteref\n", millis());
} }
if (isNoteref) { if (isNoteref) {
// ... (Rest of original isNoteref logic) ...
Serial.printf("[%lu] [NOTEREF] Found noteref: href=%s\n", millis(), href ? href : "null"); Serial.printf("[%lu] [NOTEREF] Found noteref: href=%s\n", millis(), href ? href : "null");
// Flush any pending word before starting noteref collection // Flush word buffer
// This ensures proper word order in the text flow
if (self->partWordBufferIndex > 0) { if (self->partWordBufferIndex > 0) {
// ... (flush logic) ...
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { // ... calculate style ...
fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (self->boldUntilDepth < self->depth) {
fontStyle = EpdFontFamily::BOLD;
} else if (self->italicUntilDepth < self->depth) {
fontStyle = EpdFontFamily::ITALIC;
}
self->partWordBuffer[self->partWordBufferIndex] = '\0'; self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle); self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0; self->partWordBufferIndex = 0;
@ -318,17 +361,6 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
self->currentNoterefTextLen = 0; self->currentNoterefTextLen = 0;
self->currentNoterefText[0] = '\0'; self->currentNoterefText[0] = '\0';
if (href) {
self->currentNoterefHrefLen = 0;
const char* src = href;
while (*src && self->currentNoterefHrefLen < 127) {
self->currentNoterefHref[self->currentNoterefHrefLen++] = *src++;
}
self->currentNoterefHref[self->currentNoterefHrefLen] = '\0';
} else {
self->currentNoterefHref[0] = '\0';
self->currentNoterefHrefLen = 0;
}
self->depth += 1; self->depth += 1;
return; return;
} }
@ -644,56 +676,86 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
return; return;
} }
// Rest of endElement logic for pass 2 - MODIFIED // ---------------------------------------------------------
if (strcmp(name, "a") == 0 && self->insideNoteref) { // PASS 2: Normal Parsing Logic
self->insideNoteref = false; // ---------------------------------------------------------
if (self->currentNoterefTextLen > 0) { // [NEW] 1. Reset Superscript State
Serial.printf("[%lu] [NOTEREF] %s -> %s\n", millis(), self->currentNoterefText, self->currentNoterefHref); // We must ensure we know when we are leaving a <sup> tag
if (strcmp(name, "sup") == 0) {
if (self->supDepth == self->depth) {
self->supDepth = -1;
}
}
// Add footnote first (this does the rewriting) // [MODIFIED] 2. Handle 'a' tags (Anchors/Footnotes)
self->addFootnoteToCurrentPage(self->currentNoterefText, self->currentNoterefHref); // We check "a" generally now, to handle both Noterefs AND resetting regular links
if (strcmp(name, "a") == 0) {
// Then call callback with the REWRITTEN href from currentPageFootnotes // Track if this was a noteref so we can return early later
if (self->noterefCallback && self->currentPageFootnoteCount > 0) { bool wasNoteref = self->insideNoteref;
Noteref noteref;
strncpy(noteref.number, self->currentNoterefText, 15);
noteref.number[15] = '\0';
// Use the STORED href which has been rewritten if (self->insideNoteref) {
FootnoteEntry* lastFootnote = &self->currentPageFootnotes[self->currentPageFootnoteCount - 1]; self->insideNoteref = false;
strncpy(noteref.href, lastFootnote->href, 127);
noteref.href[127] = '\0';
self->noterefCallback(noteref); if (self->currentNoterefTextLen > 0) {
Serial.printf("[%lu] [NOTEREF] %s -> %s\n", millis(), self->currentNoterefText, self->currentNoterefHref);
// Add footnote first (this does the rewriting)
self->addFootnoteToCurrentPage(self->currentNoterefText, self->currentNoterefHref);
// Then call callback with the REWRITTEN href from currentPageFootnotes
if (self->noterefCallback && self->currentPageFootnoteCount > 0) {
Noteref noteref;
strncpy(noteref.number, self->currentNoterefText, 15);
noteref.number[15] = '\0';
// Use the STORED href which has been rewritten
FootnoteEntry* lastFootnote = &self->currentPageFootnotes[self->currentPageFootnoteCount - 1];
strncpy(noteref.href, lastFootnote->href, 127);
noteref.href[127] = '\0';
self->noterefCallback(noteref);
}
// Ensure [1] appears inline after the word it references
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (self->boldUntilDepth < self->depth) {
fontStyle = EpdFontFamily::BOLD;
} else if (self->italicUntilDepth < self->depth) {
fontStyle = EpdFontFamily::ITALIC;
}
// Format the noteref text with brackets
char formattedNoteref[32];
snprintf(formattedNoteref, sizeof(formattedNoteref), "[%s]", self->currentNoterefText);
// Add it as a word to the current text block
if (self->currentTextBlock) {
self->currentTextBlock->addWord(formattedNoteref, fontStyle);
}
} }
// Ensure [1] appears inline after the word it references self->currentNoterefTextLen = 0;
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; self->currentNoterefText[0] = '\0';
if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { self->currentNoterefHrefLen = 0;
fontStyle = EpdFontFamily::BOLD_ITALIC; // Note: We do NOT clear currentNoterefHref here yet, we do it below
} else if (self->boldUntilDepth < self->depth) {
fontStyle = EpdFontFamily::BOLD;
} else if (self->italicUntilDepth < self->depth) {
fontStyle = EpdFontFamily::ITALIC;
}
// Format the noteref text with brackets
char formattedNoteref[32];
snprintf(formattedNoteref, sizeof(formattedNoteref), "[%s]", self->currentNoterefText);
// Add it as a word to the current text block
if (self->currentTextBlock) {
self->currentTextBlock->addWord(formattedNoteref, fontStyle);
}
} }
self->currentNoterefTextLen = 0; // [NEW] Reset Anchor Depth
self->currentNoterefText[0] = '\0'; // This runs for BOTH footnotes and regular links to ensure state is clean
self->currentNoterefHrefLen = 0; if (self->anchorDepth == self->depth) {
self->currentNoterefHref[0] = '\0'; self->anchorDepth = -1;
self->depth -= 1; self->currentNoterefHref[0] = '\0';
return; }
// If it was a noteref, we are done with this tag, return early
if (wasNoteref) {
self->depth -= 1;
return;
}
} }
@ -818,6 +880,9 @@ bool ChapterHtmlSlimParser::parseAndBuildPages() {
currentPageFootnoteCount = 0; currentPageFootnoteCount = 0;
isPass1CollectingAsides = false; isPass1CollectingAsides = false;
supDepth = -1;
anchorDepth = -1;
startNewTextBlock((TextBlock::Style)this->paragraphAlignment); startNewTextBlock((TextBlock::Style)this->paragraphAlignment);
const XML_Parser parser2 = XML_ParserCreate(nullptr); const XML_Parser parser2 = XML_ParserCreate(nullptr);

View File

@ -102,6 +102,10 @@ class ChapterHtmlSlimParser {
// Flag to indicate we're in Pass 1 (collecting asides only) // Flag to indicate we're in Pass 1 (collecting asides only)
bool isPass1CollectingAsides = false; bool isPass1CollectingAsides = false;
// Track superscript depth
int supDepth = -1;
int anchorDepth = -1;
void addFootnoteToCurrentPage(const char* number, const char* href); void addFootnoteToCurrentPage(const char* number, const char* href);
void startNewTextBlock(TextBlock::Style style); void startNewTextBlock(TextBlock::Style style);
void makePages(); void makePages();