mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-07 08:07:40 +03:00
use language hyphenator for overfit words
This commit is contained in:
parent
04a084f6c8
commit
10fa0cc060
@ -75,7 +75,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
|
|||||||
// Use greedy layout that can split words mid-loop when a hyphenated prefix fits.
|
// Use greedy layout that can split words mid-loop when a hyphenated prefix fits.
|
||||||
lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
|
lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
|
||||||
} else {
|
} else {
|
||||||
lineBreakIndices = computeLineBreaks(pageWidth, spaceWidth, wordWidths);
|
lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
|
||||||
}
|
}
|
||||||
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
|
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
|
||||||
|
|
||||||
@ -103,8 +103,25 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
|
|||||||
return wordWidths;
|
return wordWidths;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int spaceWidth,
|
std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
|
||||||
const std::vector<uint16_t>& wordWidths) const {
|
const int spaceWidth, std::vector<uint16_t>& wordWidths) {
|
||||||
|
if (words.empty()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation.
|
||||||
|
for (size_t i = 0; i < wordWidths.size(); ++i) {
|
||||||
|
while (wordWidths[i] > pageWidth) {
|
||||||
|
// Try language-aware hyphenation first; only fall back to heuristics when no dictionary break fits.
|
||||||
|
if (hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/false)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const size_t totalWordCount = words.size();
|
const size_t totalWordCount = words.size();
|
||||||
|
|
||||||
// DP table to store the minimum badness (cost) of lines starting at index i
|
// DP table to store the minimum badness (cost) of lines starting at index i
|
||||||
@ -260,7 +277,10 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
|||||||
const auto style = *styleIt;
|
const auto style = *styleIt;
|
||||||
|
|
||||||
// Collect candidate breakpoints (byte offsets and hyphen requirements).
|
// Collect candidate breakpoints (byte offsets and hyphen requirements).
|
||||||
const auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
|
auto breakInfos = Hyphenator::breakOffsets(word, /*allowFallback=*/false);
|
||||||
|
if (breakInfos.empty() && allowFallbackBreaks) {
|
||||||
|
breakInfos = Hyphenator::breakOffsets(word, /*allowFallback=*/true);
|
||||||
|
}
|
||||||
if (breakInfos.empty()) {
|
if (breakInfos.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -20,7 +20,8 @@ class ParsedText {
|
|||||||
bool hyphenationEnabled;
|
bool hyphenationEnabled;
|
||||||
|
|
||||||
void applyParagraphIndent();
|
void applyParagraphIndent();
|
||||||
std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths) const;
|
std::vector<size_t> computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth, int spaceWidth,
|
||||||
|
std::vector<uint16_t>& wordWidths);
|
||||||
std::vector<size_t> computeHyphenatedLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth,
|
std::vector<size_t> computeHyphenatedLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth,
|
||||||
int spaceWidth, std::vector<uint16_t>& wordWidths);
|
int spaceWidth, std::vector<uint16_t>& wordWidths);
|
||||||
bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId,
|
bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user