mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-04 14:47:37 +03:00
fix some styling edge cases: preserving spacing from parent elements, removing stray spaces after italicized text in child elements
This commit is contained in:
parent
6796989247
commit
a6d6e5e770
@ -19,6 +19,23 @@ namespace {
|
||||
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
|
||||
constexpr size_t SOFT_HYPHEN_BYTES = 2;
|
||||
|
||||
// Check if a character is punctuation that should attach to the previous word
|
||||
// (no space before it). Limited to sentence-ending and clause-separating punctuation
|
||||
// to avoid false positives with decorative brackets like "[ 1 ]".
|
||||
bool isAttachingPunctuation(const char c) {
|
||||
return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':';
|
||||
}
|
||||
|
||||
// Check if a word consists entirely of punctuation that should attach to the previous word
|
||||
bool isAttachingPunctuationWord(const std::string& word) {
|
||||
if (word.empty()) return false;
|
||||
// Check if word starts with attaching punctuation and is short (to avoid false positives)
|
||||
if (isAttachingPunctuation(word[0]) && word.size() <= 3) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
|
||||
|
||||
// Removes every soft hyphen in-place so rendered glyphs match measured widths.
|
||||
@ -369,10 +386,20 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
||||
? blockStyle.textIndent
|
||||
: 0;
|
||||
|
||||
// Calculate total word width for this line
|
||||
// Calculate total word width for this line and count actual word gaps
|
||||
// (punctuation that attaches to previous word doesn't count as a gap)
|
||||
// Note: words list starts at the beginning because previous lines were spliced out
|
||||
int lineWordWidthSum = 0;
|
||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
||||
lineWordWidthSum += wordWidths[i];
|
||||
size_t actualGapCount = 0;
|
||||
auto countWordIt = words.begin();
|
||||
|
||||
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
|
||||
lineWordWidthSum += wordWidths[lastBreakAt + wordIdx];
|
||||
// Count gaps: each word after the first creates a gap, unless it's attaching punctuation
|
||||
if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) {
|
||||
actualGapCount++;
|
||||
}
|
||||
++countWordIt;
|
||||
}
|
||||
|
||||
// Calculate spacing (account for indent reducing effective page width on first line)
|
||||
@ -382,24 +409,38 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
||||
int spacing = spaceWidth;
|
||||
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
|
||||
|
||||
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
|
||||
spacing = spareSpace / (lineWordCount - 1);
|
||||
// For justified text, calculate spacing based on actual gap count
|
||||
if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) {
|
||||
spacing = spareSpace / static_cast<int>(actualGapCount);
|
||||
}
|
||||
|
||||
// Calculate initial x position (first line starts at indent for left/justified text)
|
||||
auto xpos = static_cast<uint16_t>(firstLineIndent);
|
||||
if (style == TextBlock::RIGHT_ALIGN) {
|
||||
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
|
||||
xpos = spareSpace - static_cast<int>(actualGapCount) * spaceWidth;
|
||||
} else if (style == TextBlock::CENTER_ALIGN) {
|
||||
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
|
||||
xpos = (spareSpace - static_cast<int>(actualGapCount) * spaceWidth) / 2;
|
||||
}
|
||||
|
||||
// Pre-calculate X positions for words
|
||||
// Punctuation that attaches to the previous word doesn't get space before it
|
||||
// Note: words list starts at the beginning because previous lines were spliced out
|
||||
std::list<uint16_t> lineXPos;
|
||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
||||
const uint16_t currentWordWidth = wordWidths[i];
|
||||
auto wordIt = words.begin();
|
||||
|
||||
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
|
||||
const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
|
||||
|
||||
lineXPos.push_back(xpos);
|
||||
xpos += currentWordWidth + spacing;
|
||||
|
||||
// Add spacing after this word, unless the next word is attaching punctuation
|
||||
auto nextWordIt = wordIt;
|
||||
++nextWordIt;
|
||||
const bool nextIsAttachingPunctuation =
|
||||
wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt);
|
||||
|
||||
xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing);
|
||||
++wordIt;
|
||||
}
|
||||
|
||||
// Iterators always start at the beginning as we are moving content with splice below
|
||||
|
||||
@ -113,13 +113,44 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() {
|
||||
partWordBufferIndex = 0;
|
||||
}
|
||||
|
||||
// Merge block styles for nested block elements
|
||||
// When a child block element is inside a parent with no direct text content,
|
||||
// we accumulate their margins so nested containers properly contribute spacing
|
||||
BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) {
|
||||
BlockStyle merged;
|
||||
// Vertical margins: sum them (nested blocks create additive spacing)
|
||||
merged.marginTop = static_cast<int16_t>(parent.marginTop + child.marginTop);
|
||||
merged.marginBottom = static_cast<int16_t>(parent.marginBottom + child.marginBottom);
|
||||
// Horizontal margins: sum them (nested blocks create cumulative indentation)
|
||||
merged.marginLeft = static_cast<int16_t>(parent.marginLeft + child.marginLeft);
|
||||
merged.marginRight = static_cast<int16_t>(parent.marginRight + child.marginRight);
|
||||
// Padding: sum them
|
||||
merged.paddingTop = static_cast<int16_t>(parent.paddingTop + child.paddingTop);
|
||||
merged.paddingBottom = static_cast<int16_t>(parent.paddingBottom + child.paddingBottom);
|
||||
merged.paddingLeft = static_cast<int16_t>(parent.paddingLeft + child.paddingLeft);
|
||||
merged.paddingRight = static_cast<int16_t>(parent.paddingRight + child.paddingRight);
|
||||
// Text indent: use child's if defined, otherwise inherit parent's
|
||||
if (child.textIndentDefined) {
|
||||
merged.textIndent = child.textIndent;
|
||||
merged.textIndentDefined = true;
|
||||
} else if (parent.textIndentDefined) {
|
||||
merged.textIndent = parent.textIndent;
|
||||
merged.textIndentDefined = true;
|
||||
}
|
||||
return merged;
|
||||
}
|
||||
|
||||
// start a new text block if needed
|
||||
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
|
||||
if (currentTextBlock) {
|
||||
// already have a text block running and it is empty - just reuse it
|
||||
if (currentTextBlock->isEmpty()) {
|
||||
currentTextBlock->setStyle(style);
|
||||
currentTextBlock->setBlockStyle(blockStyle);
|
||||
// Merge with existing block style to accumulate margins from parent block elements
|
||||
// This handles cases like <div margin-bottom:2em><h1>text</h1></div> where the
|
||||
// div's margin should be preserved even though it has no direct text content
|
||||
const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle);
|
||||
currentTextBlock->setBlockStyle(merged);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user