mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-05 15:17:37 +03:00
fix some styling edge cases: preserving spacing from parent elements, removing stray spaces after italicized text in child elements
This commit is contained in:
parent
6796989247
commit
a6d6e5e770
@ -19,6 +19,23 @@ namespace {
|
|||||||
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
|
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
|
||||||
constexpr size_t SOFT_HYPHEN_BYTES = 2;
|
constexpr size_t SOFT_HYPHEN_BYTES = 2;
|
||||||
|
|
||||||
|
// Check if a character is punctuation that should attach to the previous word
|
||||||
|
// (no space before it). Limited to sentence-ending and clause-separating punctuation
|
||||||
|
// to avoid false positives with decorative brackets like "[ 1 ]".
|
||||||
|
bool isAttachingPunctuation(const char c) {
|
||||||
|
return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if a word consists entirely of punctuation that should attach to the previous word
|
||||||
|
bool isAttachingPunctuationWord(const std::string& word) {
|
||||||
|
if (word.empty()) return false;
|
||||||
|
// Check if word starts with attaching punctuation and is short (to avoid false positives)
|
||||||
|
if (isAttachingPunctuation(word[0]) && word.size() <= 3) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
|
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
|
||||||
|
|
||||||
// Removes every soft hyphen in-place so rendered glyphs match measured widths.
|
// Removes every soft hyphen in-place so rendered glyphs match measured widths.
|
||||||
@ -369,10 +386,20 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
|||||||
? blockStyle.textIndent
|
? blockStyle.textIndent
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
// Calculate total word width for this line
|
// Calculate total word width for this line and count actual word gaps
|
||||||
|
// (punctuation that attaches to previous word doesn't count as a gap)
|
||||||
|
// Note: words list starts at the beginning because previous lines were spliced out
|
||||||
int lineWordWidthSum = 0;
|
int lineWordWidthSum = 0;
|
||||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
size_t actualGapCount = 0;
|
||||||
lineWordWidthSum += wordWidths[i];
|
auto countWordIt = words.begin();
|
||||||
|
|
||||||
|
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
|
||||||
|
lineWordWidthSum += wordWidths[lastBreakAt + wordIdx];
|
||||||
|
// Count gaps: each word after the first creates a gap, unless it's attaching punctuation
|
||||||
|
if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) {
|
||||||
|
actualGapCount++;
|
||||||
|
}
|
||||||
|
++countWordIt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate spacing (account for indent reducing effective page width on first line)
|
// Calculate spacing (account for indent reducing effective page width on first line)
|
||||||
@ -382,24 +409,38 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
|||||||
int spacing = spaceWidth;
|
int spacing = spaceWidth;
|
||||||
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
|
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
|
||||||
|
|
||||||
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
|
// For justified text, calculate spacing based on actual gap count
|
||||||
spacing = spareSpace / (lineWordCount - 1);
|
if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) {
|
||||||
|
spacing = spareSpace / static_cast<int>(actualGapCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate initial x position (first line starts at indent for left/justified text)
|
// Calculate initial x position (first line starts at indent for left/justified text)
|
||||||
auto xpos = static_cast<uint16_t>(firstLineIndent);
|
auto xpos = static_cast<uint16_t>(firstLineIndent);
|
||||||
if (style == TextBlock::RIGHT_ALIGN) {
|
if (style == TextBlock::RIGHT_ALIGN) {
|
||||||
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
|
xpos = spareSpace - static_cast<int>(actualGapCount) * spaceWidth;
|
||||||
} else if (style == TextBlock::CENTER_ALIGN) {
|
} else if (style == TextBlock::CENTER_ALIGN) {
|
||||||
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
|
xpos = (spareSpace - static_cast<int>(actualGapCount) * spaceWidth) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pre-calculate X positions for words
|
// Pre-calculate X positions for words
|
||||||
|
// Punctuation that attaches to the previous word doesn't get space before it
|
||||||
|
// Note: words list starts at the beginning because previous lines were spliced out
|
||||||
std::list<uint16_t> lineXPos;
|
std::list<uint16_t> lineXPos;
|
||||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
auto wordIt = words.begin();
|
||||||
const uint16_t currentWordWidth = wordWidths[i];
|
|
||||||
|
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
|
||||||
|
const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
|
||||||
|
|
||||||
lineXPos.push_back(xpos);
|
lineXPos.push_back(xpos);
|
||||||
xpos += currentWordWidth + spacing;
|
|
||||||
|
// Add spacing after this word, unless the next word is attaching punctuation
|
||||||
|
auto nextWordIt = wordIt;
|
||||||
|
++nextWordIt;
|
||||||
|
const bool nextIsAttachingPunctuation =
|
||||||
|
wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt);
|
||||||
|
|
||||||
|
xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing);
|
||||||
|
++wordIt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterators always start at the beginning as we are moving content with splice below
|
// Iterators always start at the beginning as we are moving content with splice below
|
||||||
|
|||||||
@ -113,13 +113,44 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() {
|
|||||||
partWordBufferIndex = 0;
|
partWordBufferIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Merge block styles for nested block elements
|
||||||
|
// When a child block element is inside a parent with no direct text content,
|
||||||
|
// we accumulate their margins so nested containers properly contribute spacing
|
||||||
|
BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) {
|
||||||
|
BlockStyle merged;
|
||||||
|
// Vertical margins: sum them (nested blocks create additive spacing)
|
||||||
|
merged.marginTop = static_cast<int16_t>(parent.marginTop + child.marginTop);
|
||||||
|
merged.marginBottom = static_cast<int16_t>(parent.marginBottom + child.marginBottom);
|
||||||
|
// Horizontal margins: sum them (nested blocks create cumulative indentation)
|
||||||
|
merged.marginLeft = static_cast<int16_t>(parent.marginLeft + child.marginLeft);
|
||||||
|
merged.marginRight = static_cast<int16_t>(parent.marginRight + child.marginRight);
|
||||||
|
// Padding: sum them
|
||||||
|
merged.paddingTop = static_cast<int16_t>(parent.paddingTop + child.paddingTop);
|
||||||
|
merged.paddingBottom = static_cast<int16_t>(parent.paddingBottom + child.paddingBottom);
|
||||||
|
merged.paddingLeft = static_cast<int16_t>(parent.paddingLeft + child.paddingLeft);
|
||||||
|
merged.paddingRight = static_cast<int16_t>(parent.paddingRight + child.paddingRight);
|
||||||
|
// Text indent: use child's if defined, otherwise inherit parent's
|
||||||
|
if (child.textIndentDefined) {
|
||||||
|
merged.textIndent = child.textIndent;
|
||||||
|
merged.textIndentDefined = true;
|
||||||
|
} else if (parent.textIndentDefined) {
|
||||||
|
merged.textIndent = parent.textIndent;
|
||||||
|
merged.textIndentDefined = true;
|
||||||
|
}
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
// start a new text block if needed
|
// start a new text block if needed
|
||||||
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
|
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
|
||||||
if (currentTextBlock) {
|
if (currentTextBlock) {
|
||||||
// already have a text block running and it is empty - just reuse it
|
// already have a text block running and it is empty - just reuse it
|
||||||
if (currentTextBlock->isEmpty()) {
|
if (currentTextBlock->isEmpty()) {
|
||||||
currentTextBlock->setStyle(style);
|
currentTextBlock->setStyle(style);
|
||||||
currentTextBlock->setBlockStyle(blockStyle);
|
// Merge with existing block style to accumulate margins from parent block elements
|
||||||
|
// This handles cases like <div margin-bottom:2em><h1>text</h1></div> where the
|
||||||
|
// div's margin should be preserved even though it has no direct text content
|
||||||
|
const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle);
|
||||||
|
currentTextBlock->setBlockStyle(merged);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user