From ebbe846567a94da5b1f4572831582ec8de175107 Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Fri, 23 Jan 2026 15:35:34 -0800 Subject: [PATCH 1/7] Fix line break Flush the word preceding the line break tag to currentTextBlock before calling startNewTextBlock. --- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 1d7e2ab3..16af6245 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -125,6 +125,20 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { + // flush word preceding
to currentTextBlock before calling startNewTextBlock + EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; + if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + fontStyle = EpdFontFamily::BOLD_ITALIC; + } else if (self->boldUntilDepth < self->depth) { + fontStyle = EpdFontFamily::BOLD; + } else if (self->italicUntilDepth < self->depth) { + fontStyle = EpdFontFamily::ITALIC; + } + + self->partWordBuffer[self->partWordBufferIndex] = '\0'; + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->partWordBufferIndex = 0; + self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); From 9c9ae79acbe925b12e9967cdbc0ff7794bcdeeaa Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Fri, 23 Jan 2026 16:04:33 -0800 Subject: [PATCH 2/7] Fix formatting issue --- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 16af6245..1d7eba75 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -138,7 +138,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* self->partWordBuffer[self->partWordBufferIndex] = '\0'; self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); self->partWordBufferIndex = 0; - + self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); From e7b87dceb0bd97939bad9e67e790f065d3ce6835 Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Fri, 23 Jan 2026 16:18:36 -0800 Subject: [PATCH 3/7] Nitpick: correct XHTML tag format in comment --- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 1d7eba75..0ba165fe 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -125,7 +125,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { - // flush word preceding
to currentTextBlock before calling startNewTextBlock + // flush word preceding
to currentTextBlock before calling startNewTextBlock EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { fontStyle = EpdFontFamily::BOLD_ITALIC; From ba65ee158d7dcd8c5f02f724648b3c40d8f91f19 Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Fri, 23 Jan 2026 18:26:52 -0800 Subject: [PATCH 4/7] Refactor flush code --- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 62 +++++++------------ lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 1 + 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 0ba165fe..79e6e6c7 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -40,6 +40,23 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +EpdFontFamily::Style getFontStyle(const int boldUntilDepth, const int italicUntilDepth, const int depth) { + if (boldUntilDepth < depth && italicUntilDepth < depth) { + return EpdFontFamily::BOLD_ITALIC; + } else if (boldUntilDepth < depth) { + return EpdFontFamily::BOLD; + } else if (italicUntilDepth < depth) { + return EpdFontFamily::ITALIC; + } + return EpdFontFamily::REGULAR; +} + +void ChapterHtmlSlimParser::flushPartWordBuffer(const EpdFontFamily::Style fontStyle) { + partWordBuffer[partWordBufferIndex] = '\0'; + currentTextBlock->addWord(partWordBuffer, fontStyle); + partWordBufferIndex = 0; +} + // start a new text block if needed void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { if (currentTextBlock) { @@ -126,19 +143,8 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { // flush word preceding
to currentTextBlock before calling startNewTextBlock - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::ITALIC; - } - - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; - + EpdFontFamily::Style fontStyle = getFontStyle(self->boldUntilDepth, self->italicUntilDepth, self->depth); + self->flushPartWordBuffer(fontStyle); self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); @@ -163,22 +169,13 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char return; } - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::ITALIC; - } + EpdFontFamily::Style fontStyle = getFontStyle(self->boldUntilDepth, self->italicUntilDepth, self->depth); for (int i = 0; i < len; i++) { if (isWhitespace(s[i])) { // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; + self->flushPartWordBuffer(fontStyle); } // Skip the whitespace char continue; @@ -200,9 +197,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; + self->flushPartWordBuffer(fontStyle); } self->partWordBuffer[self->partWordBufferIndex++] = s[i]; @@ -233,18 +228,9 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; if (shouldBreakText) { - EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { - fontStyle = EpdFontFamily::ITALIC; - } + EpdFontFamily::Style fontStyle = getFontStyle(self->boldUntilDepth, self->italicUntilDepth, self->depth); - self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); - self->partWordBufferIndex = 0; + self->flushPartWordBuffer(fontStyle); } } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 5355211a..d0dc235f 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -39,6 +39,7 @@ class ChapterHtmlSlimParser { bool hyphenationEnabled; void startNewTextBlock(TextBlock::Style style); + void flushPartWordBuffer(EpdFontFamily::Style fontStyle); void makePages(); // XML callbacks static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); From 19d2bd066513a4eabb00c68378447a23f22ed1cb Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Fri, 23 Jan 2026 19:37:19 -0800 Subject: [PATCH 5/7] Consolidate and optimize --- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 31 ++++++++----------- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 2 +- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 79e6e6c7..da8ac382 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -40,18 +40,18 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } -EpdFontFamily::Style getFontStyle(const int boldUntilDepth, const int italicUntilDepth, const int depth) { - if (boldUntilDepth < depth && italicUntilDepth < depth) { - return EpdFontFamily::BOLD_ITALIC; - } else if (boldUntilDepth < depth) { - return EpdFontFamily::BOLD; +void ChapterHtmlSlimParser::flushPartWordBuffer() { + EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; + if (boldUntilDepth < depth) { + if (italicUntilDepth < depth) { + fontStyle = EpdFontFamily::BOLD_ITALIC; + } else { + fontStyle = EpdFontFamily::BOLD; + } } else if (italicUntilDepth < depth) { - return EpdFontFamily::ITALIC; + fontStyle = EpdFontFamily::ITALIC; } - return EpdFontFamily::REGULAR; -} -void ChapterHtmlSlimParser::flushPartWordBuffer(const EpdFontFamily::Style fontStyle) { partWordBuffer[partWordBufferIndex] = '\0'; currentTextBlock->addWord(partWordBuffer, fontStyle); partWordBufferIndex = 0; @@ -143,8 +143,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { // flush word preceding
to currentTextBlock before calling startNewTextBlock - EpdFontFamily::Style fontStyle = getFontStyle(self->boldUntilDepth, self->italicUntilDepth, self->depth); - self->flushPartWordBuffer(fontStyle); + self->flushPartWordBuffer(); self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); @@ -169,13 +168,11 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char return; } - EpdFontFamily::Style fontStyle = getFontStyle(self->boldUntilDepth, self->italicUntilDepth, self->depth); - for (int i = 0; i < len; i++) { if (isWhitespace(s[i])) { // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { - self->flushPartWordBuffer(fontStyle); + self->flushPartWordBuffer(); } // Skip the whitespace char continue; @@ -197,7 +194,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { - self->flushPartWordBuffer(fontStyle); + self->flushPartWordBuffer(); } self->partWordBuffer[self->partWordBufferIndex++] = s[i]; @@ -228,9 +225,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; if (shouldBreakText) { - EpdFontFamily::Style fontStyle = getFontStyle(self->boldUntilDepth, self->italicUntilDepth, self->depth); - - self->flushPartWordBuffer(fontStyle); + self->flushPartWordBuffer(); } } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index d0dc235f..2d8ebe5c 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -39,7 +39,7 @@ class ChapterHtmlSlimParser { bool hyphenationEnabled; void startNewTextBlock(TextBlock::Style style); - void flushPartWordBuffer(EpdFontFamily::Style fontStyle); + void flushPartWordBuffer(); void makePages(); // XML callbacks static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); From 90064cb2ffb08a09aac21e4ecc8de9ca1e3312eb Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Fri, 23 Jan 2026 21:53:35 -0800 Subject: [PATCH 6/7] Add comments, improve readability --- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index da8ac382..602dcbf2 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -40,18 +40,18 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +// flush the contents of partWordBuffer to currentTextBlock void ChapterHtmlSlimParser::flushPartWordBuffer() { + // determine font style EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (boldUntilDepth < depth) { - if (italicUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else { - fontStyle = EpdFontFamily::BOLD; - } + if (boldUntilDepth < depth && italicUntilDepth < depth) { + fontStyle = EpdFontFamily::BOLD_ITALIC; + } else if (boldUntilDepth < depth) { + fontStyle = EpdFontFamily::BOLD; } else if (italicUntilDepth < depth) { fontStyle = EpdFontFamily::ITALIC; } - + // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; currentTextBlock->addWord(partWordBuffer, fontStyle); partWordBufferIndex = 0; From bd66db94bd116e102407d9b30595d5af075aecdd Mon Sep 17 00:00:00 2001 From: Vincent Politzer Date: Sat, 24 Jan 2026 06:03:16 -0800 Subject: [PATCH 7/7] Add check for preceding word --- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 602dcbf2..53359179 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -142,8 +142,10 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { - // flush word preceding
to currentTextBlock before calling startNewTextBlock - self->flushPartWordBuffer(); + if (self->partWordBufferIndex > 0) { + // flush word preceding
to currentTextBlock before calling startNewTextBlock + self->flushPartWordBuffer(); + } self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment);