fix some styling edge cases: preserving spacing from parent elements, removing stray spaces after italicized text in child elements

This commit is contained in:
Jake Kenneally 2026-01-31 14:46:20 -05:00
parent 6796989247
commit a6d6e5e770
2 changed files with 83 additions and 11 deletions

View File

@ -19,6 +19,23 @@ namespace {
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
constexpr size_t SOFT_HYPHEN_BYTES = 2;
// Check if a character is punctuation that should attach to the previous word
// (no space before it). Limited to sentence-ending and clause-separating punctuation
// to avoid false positives with decorative brackets like "[ 1 ]".
bool isAttachingPunctuation(const char c) {
return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':';
}
// Check if a word consists entirely of punctuation that should attach to the previous word
bool isAttachingPunctuationWord(const std::string& word) {
if (word.empty()) return false;
// Check if word starts with attaching punctuation and is short (to avoid false positives)
if (isAttachingPunctuation(word[0]) && word.size() <= 3) {
return true;
}
return false;
}
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
// Removes every soft hyphen in-place so rendered glyphs match measured widths.
@ -369,10 +386,20 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
? blockStyle.textIndent
: 0;
// Calculate total word width for this line
// Calculate total word width for this line and count actual word gaps
// (punctuation that attaches to previous word doesn't count as a gap)
// Note: words list starts at the beginning because previous lines were spliced out
int lineWordWidthSum = 0;
for (size_t i = lastBreakAt; i < lineBreak; i++) {
lineWordWidthSum += wordWidths[i];
size_t actualGapCount = 0;
auto countWordIt = words.begin();
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
lineWordWidthSum += wordWidths[lastBreakAt + wordIdx];
// Count gaps: each word after the first creates a gap, unless it's attaching punctuation
if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) {
actualGapCount++;
}
++countWordIt;
}
// Calculate spacing (account for indent reducing effective page width on first line)
@ -382,24 +409,38 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
int spacing = spaceWidth;
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
spacing = spareSpace / (lineWordCount - 1);
// For justified text, calculate spacing based on actual gap count
if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) {
spacing = spareSpace / static_cast<int>(actualGapCount);
}
// Calculate initial x position (first line starts at indent for left/justified text)
auto xpos = static_cast<uint16_t>(firstLineIndent);
if (style == TextBlock::RIGHT_ALIGN) {
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
xpos = spareSpace - static_cast<int>(actualGapCount) * spaceWidth;
} else if (style == TextBlock::CENTER_ALIGN) {
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
xpos = (spareSpace - static_cast<int>(actualGapCount) * spaceWidth) / 2;
}
// Pre-calculate X positions for words
// Punctuation that attaches to the previous word doesn't get space before it
// Note: words list starts at the beginning because previous lines were spliced out
std::list<uint16_t> lineXPos;
for (size_t i = lastBreakAt; i < lineBreak; i++) {
const uint16_t currentWordWidth = wordWidths[i];
auto wordIt = words.begin();
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
lineXPos.push_back(xpos);
xpos += currentWordWidth + spacing;
// Add spacing after this word, unless the next word is attaching punctuation
auto nextWordIt = wordIt;
++nextWordIt;
const bool nextIsAttachingPunctuation =
wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt);
xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing);
++wordIt;
}
// Iterators always start at the beginning as we are moving content with splice below

View File

@ -113,13 +113,44 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() {
partWordBufferIndex = 0;
}
// Merge block styles for nested block elements
// When a child block element is inside a parent with no direct text content,
// we accumulate their margins so nested containers properly contribute spacing
BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) {
BlockStyle merged;
// Vertical margins: sum them (nested blocks create additive spacing)
merged.marginTop = static_cast<int16_t>(parent.marginTop + child.marginTop);
merged.marginBottom = static_cast<int16_t>(parent.marginBottom + child.marginBottom);
// Horizontal margins: sum them (nested blocks create cumulative indentation)
merged.marginLeft = static_cast<int16_t>(parent.marginLeft + child.marginLeft);
merged.marginRight = static_cast<int16_t>(parent.marginRight + child.marginRight);
// Padding: sum them
merged.paddingTop = static_cast<int16_t>(parent.paddingTop + child.paddingTop);
merged.paddingBottom = static_cast<int16_t>(parent.paddingBottom + child.paddingBottom);
merged.paddingLeft = static_cast<int16_t>(parent.paddingLeft + child.paddingLeft);
merged.paddingRight = static_cast<int16_t>(parent.paddingRight + child.paddingRight);
// Text indent: use child's if defined, otherwise inherit parent's
if (child.textIndentDefined) {
merged.textIndent = child.textIndent;
merged.textIndentDefined = true;
} else if (parent.textIndentDefined) {
merged.textIndent = parent.textIndent;
merged.textIndentDefined = true;
}
return merged;
}
// start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) {
currentTextBlock->setStyle(style);
currentTextBlock->setBlockStyle(blockStyle);
// Merge with existing block style to accumulate margins from parent block elements
// This handles cases like <div margin-bottom:2em><h1>text</h1></div> where the
// div's margin should be preserved even though it has no direct text content
const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle);
currentTextBlock->setBlockStyle(merged);
return;
}