fix some styling edge cases: preserving spacing from parent elements, removing stray spaces after italicized text in child elements

2026-02-05 15:17:37 +03:00 · 2026-01-31 14:46:20 -05:00 · 2026-01-31 14:46:20 -05:00 · a6d6e5e770
commit a6d6e5e770
parent 6796989247
2 changed files with 83 additions and 11 deletions
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@ -19,6 +19,23 @@ namespace {
 constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
 constexpr size_t SOFT_HYPHEN_BYTES = 2;
 // Check if a character is punctuation that should attach to the previous word
 // (no space before it). Limited to sentence-ending and clause-separating punctuation
 // to avoid false positives with decorative brackets like "[ 1 ]".
 bool isAttachingPunctuation(const char c) {
  return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':';
 }
 // Check if a word consists entirely of punctuation that should attach to the previous word
 bool isAttachingPunctuationWord(const std::string& word) {
  if (word.empty()) return false;
  // Check if word starts with attaching punctuation and is short (to avoid false positives)
  if (isAttachingPunctuation(word[0]) && word.size() <= 3) {
    return true;
  }
  return false;
 }
 bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
 // Removes every soft hyphen in-place so rendered glyphs match measured widths.
@ -369,10 +386,20 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
                                  ? blockStyle.textIndent
                                  : 0;
-  // Calculate total word width for this line
+  // Calculate total word width for this line and count actual word gaps
  // (punctuation that attaches to previous word doesn't count as a gap)
  // Note: words list starts at the beginning because previous lines were spliced out
  int lineWordWidthSum = 0;
-  for (size_t i = lastBreakAt; i < lineBreak; i++) {
+  size_t actualGapCount = 0;
-    lineWordWidthSum += wordWidths[i];
+  auto countWordIt = words.begin();
  for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
    lineWordWidthSum += wordWidths[lastBreakAt + wordIdx];
    // Count gaps: each word after the first creates a gap, unless it's attaching punctuation
    if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) {
      actualGapCount++;
    }
    ++countWordIt;
  }
  // Calculate spacing (account for indent reducing effective page width on first line)
@ -382,24 +409,38 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
  int spacing = spaceWidth;
  const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
-  if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
+  // For justified text, calculate spacing based on actual gap count
-    spacing = spareSpace / (lineWordCount - 1);
+  if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) {
    spacing = spareSpace / static_cast<int>(actualGapCount);
  }
  // Calculate initial x position (first line starts at indent for left/justified text)
  auto xpos = static_cast<uint16_t>(firstLineIndent);
  if (style == TextBlock::RIGHT_ALIGN) {
-    xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
+    xpos = spareSpace - static_cast<int>(actualGapCount) * spaceWidth;
  } else if (style == TextBlock::CENTER_ALIGN) {
-    xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
+    xpos = (spareSpace - static_cast<int>(actualGapCount) * spaceWidth) / 2;
  }
  // Pre-calculate X positions for words
  // Punctuation that attaches to the previous word doesn't get space before it
  // Note: words list starts at the beginning because previous lines were spliced out
  std::list<uint16_t> lineXPos;
-  for (size_t i = lastBreakAt; i < lineBreak; i++) {
+  auto wordIt = words.begin();
-    const uint16_t currentWordWidth = wordWidths[i];
+
  for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
    const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
    lineXPos.push_back(xpos);
-    xpos += currentWordWidth + spacing;
+
    // Add spacing after this word, unless the next word is attaching punctuation
    auto nextWordIt = wordIt;
    ++nextWordIt;
    const bool nextIsAttachingPunctuation =
        wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt);
    xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing);
    ++wordIt;
  }
  // Iterators always start at the beginning as we are moving content with splice below
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@ -113,13 +113,44 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() {
  partWordBufferIndex = 0;
 }
 // Merge block styles for nested block elements
 // When a child block element is inside a parent with no direct text content,
 // we accumulate their margins so nested containers properly contribute spacing
 BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) {
  BlockStyle merged;
  // Vertical margins: sum them (nested blocks create additive spacing)
  merged.marginTop = static_cast<int16_t>(parent.marginTop + child.marginTop);
  merged.marginBottom = static_cast<int16_t>(parent.marginBottom + child.marginBottom);
  // Horizontal margins: sum them (nested blocks create cumulative indentation)
  merged.marginLeft = static_cast<int16_t>(parent.marginLeft + child.marginLeft);
  merged.marginRight = static_cast<int16_t>(parent.marginRight + child.marginRight);
  // Padding: sum them
  merged.paddingTop = static_cast<int16_t>(parent.paddingTop + child.paddingTop);
  merged.paddingBottom = static_cast<int16_t>(parent.paddingBottom + child.paddingBottom);
  merged.paddingLeft = static_cast<int16_t>(parent.paddingLeft + child.paddingLeft);
  merged.paddingRight = static_cast<int16_t>(parent.paddingRight + child.paddingRight);
  // Text indent: use child's if defined, otherwise inherit parent's
  if (child.textIndentDefined) {
    merged.textIndent = child.textIndent;
    merged.textIndentDefined = true;
  } else if (parent.textIndentDefined) {
    merged.textIndent = parent.textIndent;
    merged.textIndentDefined = true;
  }
  return merged;
 }
 // start a new text block if needed
 void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
  if (currentTextBlock) {
    // already have a text block running and it is empty - just reuse it
    if (currentTextBlock->isEmpty()) {
      currentTextBlock->setStyle(style);
-      currentTextBlock->setBlockStyle(blockStyle);
+      // Merge with existing block style to accumulate margins from parent block elements
      // This handles cases like <div margin-bottom:2em><h1>text</h1></div> where the
      // div's margin should be preserved even though it has no direct text content
      const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle);
      currentTextBlock->setBlockStyle(merged);
      return;
    }