mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-05 15:17:37 +03:00
Refactor computeLineBreaks to simplify logic and improve hyphenation handling
This commit is contained in:
parent
a1f82308e4
commit
a3dc96a3b8
@ -19,11 +19,6 @@ struct HyphenSplitDecision {
|
|||||||
uint16_t prefixWidth;
|
uint16_t prefixWidth;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HyphenationGuard {
|
|
||||||
size_t prefixIndex;
|
|
||||||
size_t tailIndex;
|
|
||||||
};
|
|
||||||
|
|
||||||
bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
|
bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
|
||||||
const EpdFontStyle style, const int availableWidth, const bool includeFallback,
|
const EpdFontStyle style, const int availableWidth, const bool includeFallback,
|
||||||
HyphenSplitDecision* decision) {
|
HyphenSplitDecision* decision) {
|
||||||
@ -147,33 +142,31 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
|
|||||||
|
|
||||||
std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
|
std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
|
||||||
const int spaceWidth, std::vector<uint16_t>& wordWidths) {
|
const int spaceWidth, std::vector<uint16_t>& wordWidths) {
|
||||||
|
std::vector<size_t> lineBreakIndices;
|
||||||
if (words.empty()) {
|
if (words.empty()) {
|
||||||
return {};
|
return lineBreakIndices;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<HyphenationGuard> guards;
|
if (wordWidths.empty()) {
|
||||||
|
return lineBreakIndices;
|
||||||
|
}
|
||||||
|
|
||||||
auto shiftGuardIndices = [&](size_t insertPos) {
|
constexpr size_t MAX_LINES = 1000;
|
||||||
for (auto& guard : guards) {
|
const auto appendLineBreak = [&](size_t index) {
|
||||||
if (guard.prefixIndex >= insertPos) {
|
const size_t clampedIndex = std::min(index, wordWidths.size());
|
||||||
guard.prefixIndex++;
|
lineBreakIndices.push_back(clampedIndex);
|
||||||
}
|
return lineBreakIndices.size() < MAX_LINES;
|
||||||
if (guard.tailIndex >= insertPos) {
|
|
||||||
guard.tailIndex++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
auto runDp = [&](std::vector<size_t>& lineBreaks) {
|
const auto runDp = [&]() {
|
||||||
|
lineBreakIndices.clear();
|
||||||
const size_t totalWordCount = wordWidths.size();
|
const size_t totalWordCount = wordWidths.size();
|
||||||
|
if (totalWordCount == 0) {
|
||||||
// DP table to store the minimum badness (cost) of lines starting at index i
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int> dp(totalWordCount);
|
std::vector<int> dp(totalWordCount);
|
||||||
// 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
|
|
||||||
std::vector<size_t> ans(totalWordCount);
|
std::vector<size_t> ans(totalWordCount);
|
||||||
|
|
||||||
// Base Case
|
|
||||||
|
|
||||||
dp[totalWordCount - 1] = 0;
|
dp[totalWordCount - 1] = 0;
|
||||||
ans[totalWordCount - 1] = totalWordCount - 1;
|
ans[totalWordCount - 1] = totalWordCount - 1;
|
||||||
|
|
||||||
@ -182,129 +175,109 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
|
|||||||
dp[i] = MAX_COST;
|
dp[i] = MAX_COST;
|
||||||
|
|
||||||
for (size_t j = i; j < totalWordCount; ++j) {
|
for (size_t j = i; j < totalWordCount; ++j) {
|
||||||
// Current line length: previous width + space + current word width
|
|
||||||
currlen += wordWidths[j] + spaceWidth;
|
currlen += wordWidths[j] + spaceWidth;
|
||||||
|
|
||||||
if (currlen > pageWidth) {
|
if (currlen > pageWidth) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool violatesGuard = false;
|
|
||||||
for (const auto& guard : guards) {
|
|
||||||
if (i <= guard.prefixIndex && j >= guard.tailIndex) {
|
|
||||||
violatesGuard = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (violatesGuard) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
int cost;
|
int cost;
|
||||||
if (j == totalWordCount - 1) {
|
if (j == totalWordCount - 1) {
|
||||||
cost = 0;
|
cost = 0;
|
||||||
} else {
|
} else {
|
||||||
const int remainingSpace = pageWidth - currlen;
|
const int remainingSpace = pageWidth - currlen;
|
||||||
// Use long long for the square to prevent overflow
|
const long long costLl = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
|
||||||
const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
|
cost = costLl > MAX_COST ? MAX_COST : static_cast<int>(costLl);
|
||||||
cost = cost_ll > MAX_COST ? MAX_COST : static_cast<int>(cost_ll);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cost < dp[i]) {
|
if (cost < dp[i]) {
|
||||||
dp[i] = cost;
|
dp[i] = cost;
|
||||||
ans[i] = j; // j is the index of the last word in this optimal line
|
ans[i] = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lineBreaks.clear();
|
|
||||||
size_t currentWordIndex = 0;
|
size_t currentWordIndex = 0;
|
||||||
constexpr size_t MAX_LINES = 1000;
|
while (currentWordIndex < totalWordCount && lineBreakIndices.size() < MAX_LINES) {
|
||||||
|
|
||||||
while (currentWordIndex < totalWordCount && lineBreaks.size() < MAX_LINES) {
|
|
||||||
const size_t nextBreakIndex = ans[currentWordIndex] + 1;
|
const size_t nextBreakIndex = ans[currentWordIndex] + 1;
|
||||||
lineBreaks.push_back(nextBreakIndex);
|
if (!appendLineBreak(nextBreakIndex)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
currentWordIndex = nextBreakIndex;
|
currentWordIndex = nextBreakIndex;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Stores the index of the word that starts the next line (last_word_index + 1)
|
const auto runHyphenatedLayout = [&]() {
|
||||||
std::vector<size_t> lineBreakIndices;
|
size_t currentIndex = 0;
|
||||||
|
bool continueLayout = true;
|
||||||
|
|
||||||
while (true) {
|
while (currentIndex < wordWidths.size() && continueLayout) {
|
||||||
runDp(lineBreakIndices);
|
int lineWidth = 0;
|
||||||
|
size_t wordsOnLine = 0;
|
||||||
|
|
||||||
if (!hyphenationEnabled) {
|
while (currentIndex < wordWidths.size()) {
|
||||||
return lineBreakIndices;
|
const int interWordSpace = (wordsOnLine == 0) ? 0 : spaceWidth;
|
||||||
}
|
const int projectedWidth = lineWidth + interWordSpace + wordWidths[currentIndex];
|
||||||
|
|
||||||
bool insertedSplit = false;
|
if (projectedWidth <= pageWidth) {
|
||||||
size_t lastBreakAt = 0;
|
lineWidth = projectedWidth;
|
||||||
|
++currentIndex;
|
||||||
|
++wordsOnLine;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t lineIdx = 0; lineIdx < lineBreakIndices.size(); ++lineIdx) {
|
auto wordNodeIt = words.begin();
|
||||||
const size_t lineBreak = lineBreakIndices[lineIdx];
|
auto styleNodeIt = wordStyles.begin();
|
||||||
const bool isLastLine = lineIdx == lineBreakIndices.size() - 1;
|
std::advance(wordNodeIt, currentIndex);
|
||||||
const size_t lineWordCount = lineBreak - lastBreakAt;
|
std::advance(styleNodeIt, currentIndex);
|
||||||
|
if (wordNodeIt == words.end()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
int lineWordWidthSum = 0;
|
const int availableWidth = pageWidth - lineWidth - interWordSpace;
|
||||||
for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
|
HyphenSplitDecision decision;
|
||||||
lineWordWidthSum += wordWidths[idx];
|
if (!chooseSplitForWidth(renderer, fontId, *wordNodeIt, *styleNodeIt, availableWidth, false, &decision)) {
|
||||||
}
|
break;
|
||||||
lastBreakAt = lineBreak;
|
}
|
||||||
|
|
||||||
if (isLastLine || lineBreak >= wordWidths.size()) {
|
const std::string originalWord = *wordNodeIt;
|
||||||
continue;
|
const std::string tail = originalWord.substr(decision.byteOffset);
|
||||||
}
|
if (tail.empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const std::string prefix = originalWord.substr(0, decision.byteOffset) + "-";
|
||||||
|
|
||||||
const size_t spacingCount = lineWordCount > 0 ? lineWordCount - 1 : 0;
|
const EpdFontStyle styleForSplit = *styleNodeIt;
|
||||||
const int usedSpace = lineWordWidthSum + static_cast<int>(spacingCount) * spaceWidth;
|
*wordNodeIt = tail;
|
||||||
const int unusedWidth = pageWidth - usedSpace;
|
words.insert(wordNodeIt, prefix);
|
||||||
const int spaceNeeded = lineWordCount == 0 ? 0 : spaceWidth;
|
wordStyles.insert(styleNodeIt, styleForSplit);
|
||||||
const int budgetForPrefix = unusedWidth - spaceNeeded;
|
|
||||||
if (budgetForPrefix <= 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto nextWordIt = words.begin();
|
const uint16_t tailWidth = renderer.getTextWidth(fontId, tail.c_str(), styleForSplit);
|
||||||
auto nextStyleIt = wordStyles.begin();
|
wordWidths.insert(wordWidths.begin() + currentIndex, decision.prefixWidth);
|
||||||
std::advance(nextWordIt, lineBreak);
|
wordWidths[currentIndex + 1] = tailWidth;
|
||||||
std::advance(nextStyleIt, lineBreak);
|
|
||||||
|
|
||||||
if (nextWordIt == words.end()) {
|
lineWidth += interWordSpace + decision.prefixWidth;
|
||||||
|
++currentIndex;
|
||||||
|
++wordsOnLine;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
HyphenSplitDecision decision;
|
if (wordsOnLine == 0) {
|
||||||
if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, &decision)) {
|
++currentIndex;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const EpdFontStyle styleForSplit = *nextStyleIt;
|
continueLayout = appendLineBreak(currentIndex);
|
||||||
const std::string originalWord = *nextWordIt;
|
|
||||||
const std::string prefix = originalWord.substr(0, decision.byteOffset) + "-";
|
|
||||||
const std::string tail = originalWord.substr(decision.byteOffset);
|
|
||||||
if (tail.empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
*nextWordIt = tail;
|
|
||||||
words.insert(nextWordIt, prefix);
|
|
||||||
wordStyles.insert(nextStyleIt, styleForSplit);
|
|
||||||
|
|
||||||
const uint16_t tailWidth = renderer.getTextWidth(fontId, tail.c_str(), styleForSplit);
|
|
||||||
wordWidths.insert(wordWidths.begin() + lineBreak, decision.prefixWidth);
|
|
||||||
wordWidths[lineBreak + 1] = tailWidth;
|
|
||||||
|
|
||||||
shiftGuardIndices(lineBreak);
|
|
||||||
guards.push_back({lineBreak, lineBreak + 1});
|
|
||||||
insertedSplit = true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (!insertedSplit) {
|
if (hyphenationEnabled) {
|
||||||
return lineBreakIndices;
|
// Simple greedy layout with hyphenation
|
||||||
}
|
runHyphenatedLayout();
|
||||||
|
} else {
|
||||||
|
// TeX-like optimal layout without hyphenation
|
||||||
|
runDp();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return lineBreakIndices;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
|
void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
|
||||||
@ -314,23 +287,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
|||||||
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
|
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
|
||||||
const size_t lineWordCount = lineBreak - lastBreakAt;
|
const size_t lineWordCount = lineBreak - lastBreakAt;
|
||||||
|
|
||||||
// Calculate total word width for this line
|
|
||||||
int lineWordWidthSum = 0;
|
int lineWordWidthSum = 0;
|
||||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
|
||||||
lineWordWidthSum += wordWidths[i];
|
lineWordWidthSum += wordWidths[idx];
|
||||||
}
|
}
|
||||||
|
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
|
||||||
|
|
||||||
// Calculate spacing
|
|
||||||
const int spareSpace = pageWidth - lineWordWidthSum;
|
const int spareSpace = pageWidth - lineWordWidthSum;
|
||||||
|
|
||||||
int spacing = spaceWidth;
|
int spacing = spaceWidth;
|
||||||
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
|
|
||||||
|
|
||||||
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
|
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
|
||||||
spacing = spareSpace / (lineWordCount - 1);
|
spacing = spareSpace / (lineWordCount - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate initial x position
|
|
||||||
uint16_t xpos = 0;
|
uint16_t xpos = 0;
|
||||||
if (style == TextBlock::RIGHT_ALIGN) {
|
if (style == TextBlock::RIGHT_ALIGN) {
|
||||||
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
|
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
|
||||||
@ -338,7 +307,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
|||||||
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
|
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pre-calculate X positions for words
|
|
||||||
std::list<uint16_t> lineXPos;
|
std::list<uint16_t> lineXPos;
|
||||||
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
for (size_t i = lastBreakAt; i < lineBreak; i++) {
|
||||||
const uint16_t currentWordWidth = wordWidths[i];
|
const uint16_t currentWordWidth = wordWidths[i];
|
||||||
@ -346,13 +314,11 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
|
|||||||
xpos += currentWordWidth + spacing;
|
xpos += currentWordWidth + spacing;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterators always start at the beginning as we are moving content with splice below
|
|
||||||
auto wordEndIt = words.begin();
|
auto wordEndIt = words.begin();
|
||||||
auto wordStyleEndIt = wordStyles.begin();
|
auto wordStyleEndIt = wordStyles.begin();
|
||||||
std::advance(wordEndIt, lineWordCount);
|
std::advance(wordEndIt, lineWordCount);
|
||||||
std::advance(wordStyleEndIt, lineWordCount);
|
std::advance(wordStyleEndIt, lineWordCount);
|
||||||
|
|
||||||
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
|
|
||||||
std::list<std::string> lineWords;
|
std::list<std::string> lineWords;
|
||||||
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
|
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
|
||||||
std::list<EpdFontStyle> lineWordStyles;
|
std::list<EpdFontStyle> lineWordStyles;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user