From 712c566664577020a7ee382531346d51c464c35f Mon Sep 17 00:00:00 2001 From: Dave Allie Date: Wed, 28 Jan 2026 03:33:36 +1100 Subject: [PATCH] fix: Correctly render italics on image alt placeholders (#569) ## Summary * Correctly render italics on image alt placeholders * Parser incorrectly handled depth of self-closing tags * Self-closing tags immediately call start and end tag ## Additional Context * Previously, it would incorrectly make the whole chapter bold/italics, or not italicised the image alt --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? No --- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 86 ++++++++++++------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index f6d96be4..298c4ec6 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -84,41 +84,42 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* if (strcmp(name, "table") == 0) { // Add placeholder text self->startNewTextBlock(TextBlock::CENTER_ALIGN); - if (self->currentTextBlock) { - self->currentTextBlock->addWord("[Table omitted]", EpdFontFamily::ITALIC); - } - // Skip table contents - self->skipUntilDepth = self->depth; + self->italicUntilDepth = min(self->italicUntilDepth, self->depth); + // Advance depth before processing character data (like you would for a element with text) self->depth += 1; + self->characterData(userData, "[Table omitted]", strlen("[Table omitted]")); + + // Skip table contents (skip until parent as we pre-advanced depth above) + self->skipUntilDepth = self->depth - 1; return; } if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { // TODO: Start processing image tags - std::string alt; + std::string alt = "[Image]"; if (atts != nullptr) { for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "alt") == 0) { - // add " " (counts as whitespace) at the end of alt - // so the corresponding text block ends. - // TODO: A zero-width breaking space would be more appropriate (once/if we support it) - alt = "[Image: " + std::string(atts[i + 1]) + "] "; + if (strlen(atts[i + 1]) > 0) { + alt = "[Image: " + std::string(atts[i + 1]) + "]"; + } + break; } } - Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); - - self->startNewTextBlock(TextBlock::CENTER_ALIGN); - self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - self->depth += 1; - self->characterData(userData, alt.c_str(), alt.length()); - return; - } else { - // Skip for now - self->skipUntilDepth = self->depth; - self->depth += 1; - return; } + + Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); + + self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->italicUntilDepth = min(self->italicUntilDepth, self->depth); + // Advance depth before processing character data (like you would for a element with text) + self->depth += 1; + self->characterData(userData, alt.c_str(), alt.length()); + + // Skip table contents (skip until parent as we pre-advanced depth above) + self->skipUntilDepth = self->depth - 1; + return; } if (matches(name, SKIP_TAGS, NUM_SKIP_TAGS)) { @@ -143,25 +144,43 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { self->startNewTextBlock(TextBlock::CENTER_ALIGN); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); - } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { + self->depth += 1; + return; + } + + if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { if (self->partWordBufferIndex > 0) { // flush word preceding
to currentTextBlock before calling startNewTextBlock self->flushPartWordBuffer(); } self->startNewTextBlock(self->currentTextBlock->getStyle()); - } else { - self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); - if (strcmp(name, "li") == 0) { - self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); - } + self->depth += 1; + return; } - } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { - self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); - } else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { - self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); + + self->startNewTextBlock(static_cast(self->paragraphAlignment)); + if (strcmp(name, "li") == 0) { + self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); + } + + self->depth += 1; + return; } + if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { + self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + self->depth += 1; + return; + } + + if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { + self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); + self->depth += 1; + return; + } + + // Unprocessed tag, just increasing depth and continue forward self->depth += 1; } @@ -227,7 +246,8 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n // text styling needs to be overhauled to fix it. const bool shouldBreakText = matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; + matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; if (shouldBreakText) { self->flushPartWordBuffer();