From bf6cf83577c0229b3712a20f875fc12f271fbae1 Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Tue, 27 Jan 2026 03:07:02 -0800 Subject: [PATCH] fix: line break (#525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary * Fixes #519 * Refactors repeated code into new function: `ChapterHtmlSlimParser::flushPartWordBuffer()` ## Additional Context * The `
` tag is self closing and _in-line_, so the existing logic for closing block tags does not get applied to `
` tags. * This PR adds the _in-line_ logic to: * Flush the word preceding the `
` tag from `partWordBuffer` to `currentTextBlock` before calling `startNewTextBlock` * **New function**: `ChapterHtmlSlimParser::flushPartWordBuffer()` * **Purpose**: Consolidates the logic for flushing `partWordBuffer` to `currentTextBlock` * **Impact**: Simplifies `ChapterHtmlSlimParser::characterData(…)`, `ChapterHtmlSlimParser::startElement(…)`, and `ChapterHtmlSlimParser::endElement(…)` by integrating reused code into single function --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**NO**_ --- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 51 +++++++++---------- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 1 + 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 1d7e2ab3..53359179 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -40,6 +40,23 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +// flush the contents of partWordBuffer to currentTextBlock +void ChapterHtmlSlimParser::flushPartWordBuffer() { + // determine font style + EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; + if (boldUntilDepth < depth && italicUntilDepth < depth) { + fontStyle = EpdFontFamily::BOLD_ITALIC; + } else if (boldUntilDepth < depth) { + fontStyle = EpdFontFamily::BOLD; + } else if (italicUntilDepth < depth) { + fontStyle = EpdFontFamily::ITALIC; + } + // flush the buffer + partWordBuffer[partWordBufferIndex] = '\0'; + currentTextBlock->addWord(partWordBuffer, fontStyle); + partWordBufferIndex = 0; +} + // start a new text block if needed void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { if (currentTextBlock) { @@ -125,6 +142,10 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { + if (self->partWordBufferIndex > 0) { + // flush word preceding
to currentTextBlock before calling startNewTextBlock + self->flushPartWordBuffer(); + } self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); @@ -149,22 +170,11 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char return; } - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::ITALIC; - } - for (int i = 0; i < len; i++) { if (isWhitespace(s[i])) { // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; + self->flushPartWordBuffer(); } // Skip the whitespace char continue; @@ -186,9 +196,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; + self->flushPartWordBuffer(); } self->partWordBuffer[self->partWordBufferIndex++] = s[i]; @@ -219,18 +227,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; if (shouldBreakText) { - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::ITALIC; - } - - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; + self->flushPartWordBuffer(); } } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 5355211a..2d8ebe5c 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -39,6 +39,7 @@ class ChapterHtmlSlimParser { bool hyphenationEnabled; void startNewTextBlock(TextBlock::Style style); + void flushPartWordBuffer(); void makePages(); // XML callbacks static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);