From d564173949e44e687828b0d1fc2a858b5265e5cd Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:42:45 -0500 Subject: [PATCH] refactor: merge TextBlock::Style into BlockStyle; use bitflag underlines Major consolidation of styling infrastructure: - Remove TextBlock::Style enum (JUSTIFIED, LEFT_ALIGN, etc.) Alignment is now stored in BlockStyle.alignment using CssTextAlign - Remove wordUnderlines list from TextBlock and ParsedText Underline state is now encoded in EpdFontFamily::Style via UNDERLINE bitflag - Use BlockStyle::fromCssStyle() and getCombinedBlockStyle() in parser Removes duplicated createBlockStyleFromCss() and mergeBlockStyles() - Simplify text block rendering to check style bitflag for underlines - Revert spurious spaces handling (isAttachingPunctuation logic) The actualGapCount approach had issues; using standard word gaps This reduces code duplication and simplifies the style inheritance model. --- lib/Epub/Epub/ParsedText.cpp | 113 ++++------- lib/Epub/Epub/ParsedText.h | 15 +- lib/Epub/Epub/blocks/TextBlock.cpp | 66 ++---- lib/Epub/Epub/blocks/TextBlock.h | 24 +-- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 189 +++++------------- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 5 +- 6 files changed, 113 insertions(+), 299 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 63648f04..aca85581 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -19,23 +19,6 @@ namespace { constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD"; constexpr size_t SOFT_HYPHEN_BYTES = 2; -// Check if a character is punctuation that should attach to the previous word -// (no space before it). Includes sentence punctuation and closing quotes. -// Excludes brackets/parens to avoid false positives with decorative patterns like "[ 1 ]". -bool isAttachingPunctuation(const char c) { - return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':' || c == '"' || c == '\''; -} - -// Check if a word consists entirely of punctuation that should attach to the previous word -bool isAttachingPunctuationWord(const std::string& word) { - if (word.empty()) return false; - // Check if word starts with attaching punctuation and is short (to avoid false positives) - if (isAttachingPunctuation(word[0]) && word.size() <= 3) { - return true; - } - return false; -} - bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; } // Removes every soft hyphen in-place so rendered glyphs match measured widths. @@ -66,12 +49,15 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s } // namespace -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style style, const bool underline) { if (word.empty()) return; words.push_back(std::move(word)); - wordStyles.push_back(fontStyle); - wordUnderlines.push_back(underline); + EpdFontFamily::Style combinedStyle = style; + if (underline) { + combinedStyle = static_cast(combinedStyle | EpdFontFamily::UNDERLINE); + } + wordStyles.push_back(combinedStyle); } // Consumes data to minimize memory usage @@ -112,8 +98,7 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere auto wordStylesIt = wordStyles.begin(); while (wordsIt != words.end()) { - uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt); - wordWidths.push_back(width); + wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt)); std::advance(wordsIt, 1); std::advance(wordStylesIt, 1); @@ -129,10 +114,11 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c } // Calculate first line indent (only for left/justified text without extra paragraph spacing) - const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing && - (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) - ? blockStyle.textIndent - : 0; + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; // Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation. for (size_t i = 0; i < wordWidths.size(); ++i) { @@ -233,7 +219,7 @@ void ParsedText::applyParagraphIndent() { if (blockStyle.textIndentDefined) { // CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace // The actual indent positioning is handled in extractLine() - } else if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) { + } else if (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) { // No CSS text-indent defined - use EmSpace fallback for visual indent words.front().insert(0, "\xe2\x80\x83"); } @@ -244,10 +230,11 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r const int pageWidth, const int spaceWidth, std::vector& wordWidths) { // Calculate first line indent (only for left/justified text without extra paragraph spacing) - const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing && - (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) - ? blockStyle.textIndent - : 0; + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; std::vector lineBreakIndices; size_t currentIndex = 0; @@ -381,25 +368,16 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const // Calculate first line indent (only for left/justified text without extra paragraph spacing) const bool isFirstLine = breakIndex == 0; - const int firstLineIndent = isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && - (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) - ? blockStyle.textIndent - : 0; + const int firstLineIndent = + isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; - // Calculate total word width for this line and count actual word gaps - // (punctuation that attaches to previous word doesn't count as a gap) - // Note: words list starts at the beginning because previous lines were spliced out + // Calculate total word width for this line int lineWordWidthSum = 0; - size_t actualGapCount = 0; - auto countWordIt = words.begin(); - - for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { - lineWordWidthSum += wordWidths[lastBreakAt + wordIdx]; - // Count gaps: each word after the first creates a gap, unless it's attaching punctuation - if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) { - actualGapCount++; - } - ++countWordIt; + for (size_t i = lastBreakAt; i < lineBreak; i++) { + lineWordWidthSum += wordWidths[i]; } // Calculate spacing (account for indent reducing effective page width on first line) @@ -409,54 +387,37 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spacing = spaceWidth; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - // For justified text, calculate spacing based on actual gap count - if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) { - spacing = spareSpace / static_cast(actualGapCount); + if (blockStyle.alignment == CssTextAlign::Justify && !isLastLine && lineWordCount >= 2) { + spacing = spareSpace / (lineWordCount - 1); } // Calculate initial x position (first line starts at indent for left/justified text) auto xpos = static_cast(firstLineIndent); - if (style == TextBlock::RIGHT_ALIGN) { - xpos = spareSpace - static_cast(actualGapCount) * spaceWidth; - } else if (style == TextBlock::CENTER_ALIGN) { - xpos = (spareSpace - static_cast(actualGapCount) * spaceWidth) / 2; + if (blockStyle.alignment == CssTextAlign::Right) { + xpos = spareSpace - (lineWordCount - 1) * spaceWidth; + } else if (blockStyle.alignment == CssTextAlign::Center) { + xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; } // Pre-calculate X positions for words - // Punctuation that attaches to the previous word doesn't get space before it - // Note: words list starts at the beginning because previous lines were spliced out std::list lineXPos; - auto wordIt = words.begin(); - - for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { - const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx]; - + for (size_t i = lastBreakAt; i < lineBreak; i++) { + const uint16_t currentWordWidth = wordWidths[i]; lineXPos.push_back(xpos); - - // Add spacing after this word, unless the next word is attaching punctuation - auto nextWordIt = wordIt; - ++nextWordIt; - const bool nextIsAttachingPunctuation = wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt); - - xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing); - ++wordIt; + xpos += currentWordWidth + spacing; } // Iterators always start at the beginning as we are moving content with splice below auto wordEndIt = words.begin(); auto wordStyleEndIt = wordStyles.begin(); - auto wordUnderlineEndIt = wordUnderlines.begin(); std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); - std::advance(wordUnderlineEndIt, lineWordCount); // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); - std::list lineWordUnderlines; - lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt); for (auto& word : lineWords) { if (containsSoftHyphen(word)) { @@ -464,6 +425,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const } } - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style, - blockStyle, std::move(lineWordUnderlines))); + processLine( + std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle)); } diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index cc2596c5..a13d13b5 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -16,8 +16,6 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; - std::list wordUnderlines; // Track underline per word - TextBlock::Style style; BlockStyle blockStyle; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -35,19 +33,14 @@ class ParsedText { std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: - explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, - const bool hyphenationEnabled = false, const BlockStyle& blockStyle = BlockStyle()) - : style(style), - blockStyle(blockStyle), - extraParagraphSpacing(extraParagraphSpacing), - hyphenationEnabled(hyphenationEnabled) {} + explicit ParsedText(const bool extraParagraphSpacing, const bool hyphenationEnabled = false, + const BlockStyle& blockStyle = BlockStyle()) + : blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); - void setStyle(const TextBlock::Style style) { this->style = style; } void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } - TextBlock::Style getStyle() const { return style; } - const BlockStyle& getBlockStyle() const { return blockStyle; } + BlockStyle& getBlockStyle() { return blockStyle; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 724471b6..3ab25558 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -14,15 +14,14 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); - auto wordUnderlineIt = wordUnderlines.begin(); for (size_t i = 0; i < words.size(); i++) { const int wordX = *wordXposIt + x; - renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt); + const EpdFontFamily::Style currentStyle = *wordStylesIt; + renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle); - // Draw underline if word is underlined - if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) { + if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) { const std::string& w = *wordIt; - const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt); + const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle); // y is the top of the text line; add ascender to reach baseline, then offset 2px below const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; @@ -34,7 +33,7 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int static_cast(w[2]) == 0x83) { const char* visiblePtr = w.c_str() + 3; const int prefixWidth = renderer.getTextAdvanceX(fontId, std::string("\xe2\x80\x83").c_str()); - const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt); + const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, currentStyle); startX = wordX + prefixWidth; underlineWidth = visibleWidth; } @@ -45,9 +44,6 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int std::advance(wordIt, 1); std::advance(wordStylesIt, 1); std::advance(wordXposIt, 1); - if (wordUnderlineIt != wordUnderlines.end()) { - std::advance(wordUnderlineIt, 1); - } } } @@ -64,29 +60,9 @@ bool TextBlock::serialize(FsFile& file) const { for (auto x : wordXpos) serialization::writePod(file, x); for (auto s : wordStyles) serialization::writePod(file, s); - // Underline flags (packed as bytes, 8 words per byte) - uint8_t underlineByte = 0; - int bitIndex = 0; - auto underlineIt = wordUnderlines.begin(); - for (size_t i = 0; i < words.size(); i++) { - if (underlineIt != wordUnderlines.end() && *underlineIt) { - underlineByte |= 1 << bitIndex; - } - bitIndex++; - if (bitIndex == 8 || i == words.size() - 1) { - serialization::writePod(file, underlineByte); - underlineByte = 0; - bitIndex = 0; - } - if (underlineIt != wordUnderlines.end()) { - ++underlineIt; - } - } - - // Block style (alignment) - serialization::writePod(file, style); - - // Block style (margins/padding/indent) + // Style (alignment + margins/padding/indent) + serialization::writePod(file, blockStyle.alignment); + serialization::writePod(file, blockStyle.textAlignDefined); serialization::writePod(file, blockStyle.marginTop); serialization::writePod(file, blockStyle.marginBottom); serialization::writePod(file, blockStyle.marginLeft); @@ -106,8 +82,6 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { std::list words; std::list wordXpos; std::list wordStyles; - std::list wordUnderlines; - Style style; BlockStyle blockStyle; // Word count @@ -127,23 +101,9 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& s : wordStyles) serialization::readPod(file, s); - // Underline flags (packed as bytes, 8 words per byte) - wordUnderlines.resize(wc, false); - auto underlineIt = wordUnderlines.begin(); - const int bytesNeeded = (wc + 7) / 8; - for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) { - uint8_t underlineByte; - serialization::readPod(file, underlineByte); - for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) { - *underlineIt = (underlineByte & 1 << bit) != 0; - ++underlineIt; - } - } - - // Block style (alignment) - serialization::readPod(file, style); - - // Block style (margins/padding/indent) + // Style (alignment + margins/padding/indent) + serialization::readPod(file, blockStyle.alignment); + serialization::readPod(file, blockStyle.textAlignDefined); serialization::readPod(file, blockStyle.marginTop); serialization::readPod(file, blockStyle.marginBottom); serialization::readPod(file, blockStyle.marginLeft); @@ -155,6 +115,6 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { serialization::readPod(file, blockStyle.textIndent); serialization::readPod(file, blockStyle.textIndentDefined); - return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style, - blockStyle, std::move(wordUnderlines))); + return std::unique_ptr( + new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), blockStyle)); } diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index e7993fe8..e233f77f 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -11,41 +11,21 @@ // Represents a line of text on a page class TextBlock final : public Block { - public: - enum Style : uint8_t { - JUSTIFIED = 0, - LEFT_ALIGN = 1, - CENTER_ALIGN = 2, - RIGHT_ALIGN = 3, - }; - private: std::list words; std::list wordXpos; std::list wordStyles; - std::list wordUnderlines; // Track underline per word - Style style; BlockStyle blockStyle; public: explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const Style style, - const BlockStyle& blockStyle = BlockStyle(), std::list word_underlines = std::list()) + std::list word_styles, const BlockStyle& blockStyle = BlockStyle()) : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), - wordUnderlines(std::move(word_underlines)), - style(style), - blockStyle(blockStyle) { - // Ensure underlines list matches words list size - while (this->wordUnderlines.size() < this->words.size()) { - this->wordUnderlines.push_back(false); - } - } + blockStyle(blockStyle) {} ~TextBlock() override = default; - void setStyle(const Style style) { this->style = style; } void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } - Style getStyle() const { return style; } const BlockStyle& getBlockStyle() const { return blockStyle; } bool isEmpty() override { return words.empty(); } void layout(GfxRenderer& renderer) override {}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 68737279..ab93d9cb 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -43,39 +43,17 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } -// Create a BlockStyle from CSS style properties, resolving CssLength values to pixels -// emSize is the current font line height, used for em/rem unit conversion -BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle, const float emSize) { - BlockStyle blockStyle; - // Resolve all CssLength values to pixels using the current font's em size - const int16_t marginTopPx = cssStyle.marginTop.toPixelsInt16(emSize); - const int16_t marginBottomPx = cssStyle.marginBottom.toPixelsInt16(emSize); - const int16_t paddingTopPx = cssStyle.paddingTop.toPixelsInt16(emSize); - const int16_t paddingBottomPx = cssStyle.paddingBottom.toPixelsInt16(emSize); - - // Vertical: combine margin and padding for top/bottom spacing - blockStyle.marginTop = static_cast(marginTopPx + paddingTopPx); - blockStyle.marginBottom = static_cast(marginBottomPx + paddingBottomPx); - blockStyle.paddingTop = paddingTopPx; - blockStyle.paddingBottom = paddingBottomPx; - // Horizontal: store margin and padding separately for layout calculations - blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize); - blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize); - blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); - blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); - // Text indent - blockStyle.textIndent = cssStyle.textIndent.toPixelsInt16(emSize); - blockStyle.textIndentDefined = cssStyle.defined.textIndent; - return blockStyle; +bool isHeaderOrBlock(const char* name) { + return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); } // Update effective bold/italic/underline based on block style and inline style stack void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { // Start with block-level styles - effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; - effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; + effectiveBold = currentCssStyle.hasFontWeight() && currentCssStyle.fontWeight == CssFontWeight::Bold; + effectiveItalic = currentCssStyle.hasFontStyle() && currentCssStyle.fontStyle == CssFontStyle::Italic; effectiveUnderline = - currentBlockStyle.hasTextDecoration() && currentBlockStyle.textDecoration == CssTextDecoration::Underline; + currentCssStyle.hasTextDecoration() && currentCssStyle.textDecoration == CssTextDecoration::Underline; // Apply inline style stack in order for (const auto& entry : inlineStyleStack) { @@ -98,69 +76,41 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() { const bool isItalic = italicUntilDepth < depth || effectiveItalic; const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline; + // Combine style flags using bitwise OR EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (isBold && isItalic) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (isBold) { - fontStyle = EpdFontFamily::BOLD; - } else if (isItalic) { - fontStyle = EpdFontFamily::ITALIC; + if (isBold) { + fontStyle = static_cast(fontStyle | EpdFontFamily::BOLD); + } + if (isItalic) { + fontStyle = static_cast(fontStyle | EpdFontFamily::ITALIC); + } + if (isUnderline) { + fontStyle = static_cast(fontStyle | EpdFontFamily::UNDERLINE); } // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; - currentTextBlock->addWord(partWordBuffer, fontStyle, isUnderline); + currentTextBlock->addWord(partWordBuffer, fontStyle); partWordBufferIndex = 0; } -// Merge block styles for nested block elements -// When a child block element is inside a parent with no direct text content, -// we accumulate their margins so nested containers properly contribute spacing -BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) { - BlockStyle merged; - // Vertical margins: sum them (nested blocks create additive spacing) - merged.marginTop = static_cast(parent.marginTop + child.marginTop); - merged.marginBottom = static_cast(parent.marginBottom + child.marginBottom); - // Horizontal margins: sum them (nested blocks create cumulative indentation) - merged.marginLeft = static_cast(parent.marginLeft + child.marginLeft); - merged.marginRight = static_cast(parent.marginRight + child.marginRight); - // Padding: sum them - merged.paddingTop = static_cast(parent.paddingTop + child.paddingTop); - merged.paddingBottom = static_cast(parent.paddingBottom + child.paddingBottom); - merged.paddingLeft = static_cast(parent.paddingLeft + child.paddingLeft); - merged.paddingRight = static_cast(parent.paddingRight + child.paddingRight); - // Text indent: use child's if defined, otherwise inherit parent's - if (child.textIndentDefined) { - merged.textIndent = child.textIndent; - merged.textIndentDefined = true; - } else if (parent.textIndentDefined) { - merged.textIndent = parent.textIndent; - merged.textIndentDefined = true; - } - return merged; -} - // start a new text block if needed -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) { +void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { - currentTextBlock->setStyle(style); - // Merge with existing block style to accumulate margins from parent block elements - // This handles cases like

text

where the - // div's margin should be preserved even though it has no direct text content - const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle); - currentTextBlock->setBlockStyle(merged); + // Merge with existing block style to accumulate CSS styling from parent block elements. + // This handles cases like

text

where the + // div's margin should be preserved, even though it has no direct text content. + currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle)); return; } makePages(); } - currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled, blockStyle)); + currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle)); } -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); } - void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); @@ -183,13 +133,17 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } + auto centeredBlockStyle = BlockStyle(); + centeredBlockStyle.textAlignDefined = true; + centeredBlockStyle.alignment = CssTextAlign::Center; + // Special handling for tables - show placeholder text instead of dropping silently if (strcmp(name, "table") == 0) { // Add placeholder text - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, "[Table omitted]", strlen("[Table omitted]")); @@ -214,9 +168,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, alt.c_str(), alt.length()); @@ -244,9 +198,6 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } - // Determine if this is a block element - bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); - // Compute CSS style for this element CssStyle cssStyle; if (self->cssParser) { @@ -259,30 +210,12 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } - if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - // Headers: center aligned, bold, apply CSS overrides - TextBlock::Style alignment = TextBlock::CENTER_ALIGN; - if (cssStyle.hasTextAlign()) { - switch (cssStyle.textAlign) { - case CssTextAlign::Left: - alignment = TextBlock::LEFT_ALIGN; - break; - case CssTextAlign::Right: - alignment = TextBlock::RIGHT_ALIGN; - break; - case CssTextAlign::Center: - alignment = TextBlock::CENTER_ALIGN; - break; - case CssTextAlign::Justify: - alignment = TextBlock::JUSTIFIED; - break; - default: - break; - } - } + const float emSize = static_cast(self->renderer.getLineHeight(self->fontId)) * self->lineCompression; + const auto userAlignment = static_cast(self->paragraphAlignment); - self->currentBlockStyle = cssStyle; - self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId))); + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); self->updateEffectiveInlineStyle(); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { @@ -291,31 +224,10 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* // flush word preceding
to currentTextBlock before calling startNewTextBlock self->flushPartWordBuffer(); } - self->startNewTextBlock(self->currentTextBlock->getStyle()); + self->startNewTextBlock(self->currentTextBlock->getBlockStyle()); } else { - // Determine alignment from CSS or default - auto alignment = static_cast(self->paragraphAlignment); - if (cssStyle.hasTextAlign()) { - switch (cssStyle.textAlign) { - case CssTextAlign::Left: - alignment = TextBlock::LEFT_ALIGN; - break; - case CssTextAlign::Right: - alignment = TextBlock::RIGHT_ALIGN; - break; - case CssTextAlign::Center: - alignment = TextBlock::CENTER_ALIGN; - break; - case CssTextAlign::Justify: - alignment = TextBlock::JUSTIFIED; - break; - default: - break; - } - } - - self->currentBlockStyle = cssStyle; - self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId))); + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); self->updateEffectiveInlineStyle(); if (strcmp(name, "li") == 0) { @@ -373,7 +285,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } self->inlineStyleStack.push_back(entry); self->updateEffectiveInlineStyle(); - } else if (strcmp(name, "span") == 0 || !isBlockElement) { + } else if (strcmp(name, "span") == 0 || !isHeaderOrBlock(name)) { // Handle span and other inline elements for CSS styling if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) { StyleStackEntry entry; @@ -464,12 +376,12 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; + const bool headerOrBlockTag = isHeaderOrBlock(name); // Flush buffer with current style BEFORE any style changes if (self->partWordBufferIndex > 0) { // Flush if style will change OR if we're closing a block/structural element - const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || - matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || + const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; @@ -508,15 +420,18 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n self->updateEffectiveInlineStyle(); } - // Clear block style when leaving block elements - if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - self->currentBlockStyle.reset(); + // Clear block style when leaving header or block elements + if (headerOrBlockTag) { + self->currentCssStyle.reset(); self->updateEffectiveInlineStyle(); } } bool ChapterHtmlSlimParser::parseAndBuildPages() { - startNewTextBlock((TextBlock::Style)this->paragraphAlignment); + auto paragraphAlignmentBlockStyle = BlockStyle(); + paragraphAlignmentBlockStyle.textAlignDefined = true; + paragraphAlignmentBlockStyle.alignment = static_cast(this->paragraphAlignment); + startNewTextBlock(paragraphAlignmentBlockStyle); const XML_Parser parser = XML_ParserCreate(nullptr); int done; @@ -624,11 +539,14 @@ void ChapterHtmlSlimParser::makePages() { const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; - // Apply marginTop before the paragraph (stored in pixels) + // Apply top spacing before the paragraph (stored in pixels) const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); if (blockStyle.marginTop > 0) { currentPageNextY += blockStyle.marginTop; } + if (blockStyle.paddingTop > 0) { + currentPageNextY += blockStyle.paddingTop; + } // Calculate effective width accounting for horizontal margins/padding const int horizontalInset = blockStyle.totalHorizontalInset(); @@ -639,10 +557,13 @@ void ChapterHtmlSlimParser::makePages() { renderer, fontId, effectiveWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Apply marginBottom after the paragraph (stored in pixels) + // Apply bottom spacing after the paragraph (stored in pixels) if (blockStyle.marginBottom > 0) { currentPageNextY += blockStyle.marginBottom; } + if (blockStyle.paddingBottom > 0) { + currentPageNextY += blockStyle.paddingBottom; + } // Extra paragraph spacing if enabled (default behavior) if (extraParagraphSpacing) { diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 7d9803f5..92a9838a 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -50,14 +50,13 @@ class ChapterHtmlSlimParser { bool hasUnderline = false, underline = false; }; std::vector inlineStyleStack; - CssStyle currentBlockStyle; + CssStyle currentCssStyle; bool effectiveBold = false; bool effectiveItalic = false; bool effectiveUnderline = false; void updateEffectiveInlineStyle(); - void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle); - void startNewTextBlock(TextBlock::Style style); + void startNewTextBlock(const BlockStyle& blockStyle); void flushPartWordBuffer(); void makePages(); // XML callbacks