optimization

This commit is contained in:
Arthur Tazhitdinov 2026-01-08 03:01:36 +05:00
parent 34d2b81f40
commit 58314e9efd
4 changed files with 45 additions and 68 deletions

View File

@ -207,28 +207,29 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
const int spacing = isFirstWord ? 0 : spaceWidth; const int spacing = isFirstWord ? 0 : spaceWidth;
const int candidateWidth = spacing + wordWidths[currentIndex]; const int candidateWidth = spacing + wordWidths[currentIndex];
// Word fits on current line
if (lineWidth + candidateWidth <= pageWidth) { if (lineWidth + candidateWidth <= pageWidth) {
lineWidth += candidateWidth; lineWidth += candidateWidth;
currentIndex += 1; ++currentIndex;
continue; continue;
} }
// Word would overflow — try to split based on hyphenation points // Word would overflow — try to split based on hyphenation points
const int availableWidth = pageWidth - lineWidth - spacing; const int availableWidth = pageWidth - lineWidth - spacing;
const bool allowFallbackBreaks = const bool allowFallbackBreaks = isFirstWord; // Only for first word on line
isFirstWord; // Permit fallback breaks only when first word one the line still overflows
if (availableWidth > 0 && if (availableWidth > 0 &&
hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) { hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) {
// Prefix now fits; append it to this line and immediately move to the next line // Prefix now fits; append it to this line and move to next line
lineWidth += spacing + wordWidths[currentIndex]; lineWidth += spacing + wordWidths[currentIndex];
currentIndex += 1; ++currentIndex;
break; break;
} }
// Could not split: force at least one word per line to avoid infinite loop // Could not split: force at least one word per line to avoid infinite loop
if (currentIndex == lineStart) { if (currentIndex == lineStart) {
lineWidth += candidateWidth; lineWidth += candidateWidth;
currentIndex += 1; ++currentIndex;
} }
break; break;
} }
@ -249,19 +250,21 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
return false; return false;
} }
// Position iterators at the target word and its style entry. // Get iterators to target word and style.
auto wordIt = words.begin(); auto wordIt = words.begin();
auto styleIt = wordStyles.begin(); auto styleIt = wordStyles.begin();
std::advance(wordIt, wordIndex); std::advance(wordIt, wordIndex);
std::advance(styleIt, wordIndex); std::advance(styleIt, wordIndex);
const std::string& word = *wordIt;
const auto style = *styleIt;
// Collect candidate breakpoints (byte offsets and hyphen requirements). // Collect candidate breakpoints (byte offsets and hyphen requirements).
const auto breakInfos = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks); const auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
if (breakInfos.empty()) { if (breakInfos.empty()) {
return false; return false;
} }
const auto style = *styleIt;
size_t chosenOffset = 0; size_t chosenOffset = 0;
int chosenWidth = -1; int chosenWidth = -1;
bool chosenNeedsHyphen = true; bool chosenNeedsHyphen = true;
@ -269,22 +272,19 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
// Iterate over each legal breakpoint and retain the widest prefix that still fits. // Iterate over each legal breakpoint and retain the widest prefix that still fits.
for (const auto& info : breakInfos) { for (const auto& info : breakInfos) {
const size_t offset = info.byteOffset; const size_t offset = info.byteOffset;
if (offset == 0 || offset >= wordIt->size()) { if (offset == 0 || offset >= word.size()) {
continue; continue;
} }
const bool needsHyphen = info.requiresInsertedHyphen; const bool needsHyphen = info.requiresInsertedHyphen;
std::string prefix = wordIt->substr(0, offset); const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen);
const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen); if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) {
if (prefixWidth > availableWidth) { continue; // Skip if too wide or not an improvement
continue;
} }
if (prefixWidth > chosenWidth) { chosenWidth = prefixWidth;
chosenWidth = prefixWidth; chosenOffset = offset;
chosenOffset = offset; chosenNeedsHyphen = needsHyphen;
chosenNeedsHyphen = needsHyphen;
}
} }
if (chosenWidth < 0) { if (chosenWidth < 0) {
@ -293,7 +293,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
} }
// Split the word at the selected breakpoint and append a hyphen if required. // Split the word at the selected breakpoint and append a hyphen if required.
std::string remainder = wordIt->substr(chosenOffset); std::string remainder = word.substr(chosenOffset);
wordIt->resize(chosenOffset); wordIt->resize(chosenOffset);
if (chosenNeedsHyphen) { if (chosenNeedsHyphen) {
wordIt->push_back('-'); wordIt->push_back('-');

View File

@ -59,8 +59,6 @@ bool isCyrillicConsonant(const uint32_t cp) { return isCyrillicLetter(cp) && !is
bool isAlphabetic(const uint32_t cp) { return isLatinLetter(cp) || isCyrillicLetter(cp); } bool isAlphabetic(const uint32_t cp) { return isLatinLetter(cp) || isCyrillicLetter(cp); }
bool isVowel(const uint32_t cp) { return isLatinVowel(cp) || isCyrillicVowel(cp); }
bool isPunctuation(const uint32_t cp) { bool isPunctuation(const uint32_t cp) {
switch (cp) { switch (cp) {
case '.': case '.':

View File

@ -25,7 +25,6 @@ bool isCyrillicVowel(uint32_t cp);
bool isCyrillicConsonant(uint32_t cp); bool isCyrillicConsonant(uint32_t cp);
bool isAlphabetic(uint32_t cp); bool isAlphabetic(uint32_t cp);
bool isVowel(uint32_t cp);
bool isPunctuation(uint32_t cp); bool isPunctuation(uint32_t cp);
bool isAsciiDigit(uint32_t cp); bool isAsciiDigit(uint32_t cp);
bool isExplicitHyphen(uint32_t cp); bool isExplicitHyphen(uint32_t cp);

View File

@ -3,7 +3,6 @@
#include <Utf8.h> #include <Utf8.h>
#include <algorithm> #include <algorithm>
#include <array>
#include <vector> #include <vector>
#include "EnglishHyphenator.h" #include "EnglishHyphenator.h"
@ -32,12 +31,6 @@ const LanguageHyphenator* hyphenatorForLanguage(const std::string& langTag) {
return nullptr; return nullptr;
} }
// Preferred language hint; empty means "auto".
std::string& preferredLanguage() {
static std::string lang;
return lang;
}
// Cached hyphenator instance for the current preferred language. // Cached hyphenator instance for the current preferred language.
const LanguageHyphenator*& cachedHyphenator() { const LanguageHyphenator*& cachedHyphenator() {
static const LanguageHyphenator* hyphenator = nullptr; static const LanguageHyphenator* hyphenator = nullptr;
@ -86,67 +79,54 @@ void trimTrailingFootnoteReference(std::vector<CodepointInfo>& cps) {
// Asks the language hyphenator for legal break positions inside the word. // Asks the language hyphenator for legal break positions inside the word.
std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps) { std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps) {
if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
return {};
}
if (const auto* hyphenator = cachedHyphenator()) { if (const auto* hyphenator = cachedHyphenator()) {
auto indexes = hyphenator->breakIndexes(cps); return hyphenator->breakIndexes(cps);
return indexes;
} }
return {}; return {};
} }
// Maps a codepoint index back to its byte offset inside the source word. // Maps a codepoint index back to its byte offset inside the source word.
size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t index) { size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t index) {
if (index >= cps.size()) { return (index < cps.size()) ? cps[index].byteOffset : (cps.empty() ? 0 : cps.back().byteOffset);
return cps.empty() ? 0 : cps.back().byteOffset;
}
return cps[index].byteOffset;
} }
// Builds a vector of break information from explicit hyphen markers in the given codepoints. // Builds a vector of break information from explicit hyphen markers in the given codepoints.
std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) { std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) {
std::vector<Hyphenator::BreakInfo> breaks; std::vector<Hyphenator::BreakInfo> breaks;
breaks.reserve(cps.size());
// Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters. // Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters.
for (size_t i = 0; i < cps.size(); ++i) { for (size_t i = 1; i + 1 < cps.size(); ++i) {
const uint32_t cp = cps[i].value; const uint32_t cp = cps[i].value;
if (!isExplicitHyphen(cp) || i == 0 || i + 1 >= cps.size()) { if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
continue; // Need at least one alphabetic character on both sides.
}
if (!isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
continue; continue;
} }
// Offset points to the next codepoint so rendering starts after the hyphen marker. // Offset points to the next codepoint so rendering starts after the hyphen marker.
breaks.push_back({byteOffsetForIndex(cps, i + 1), isSoftHyphen(cp)}); breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)});
} }
if (breaks.empty()) { if (breaks.empty()) {
return breaks; return breaks;
} }
// Sort by byte offset so we can deduplicate sequential markers. // Sort by byte offset so we can deduplicate sequential markers in-place.
// Multiple dash codepoints can point to the same byte offset once punctuation is trimmed; sort before merging.
std::sort(breaks.begin(), breaks.end(), [](const Hyphenator::BreakInfo& lhs, const Hyphenator::BreakInfo& rhs) { std::sort(breaks.begin(), breaks.end(), [](const Hyphenator::BreakInfo& lhs, const Hyphenator::BreakInfo& rhs) {
return lhs.byteOffset < rhs.byteOffset; return lhs.byteOffset < rhs.byteOffset;
}); });
// Ensure we keep a single entry per break while retaining the "needs hyphen" flag when any marker requested it. // Deduplicate in-place: merge entries at same offset while retaining "needs hyphen" flag.
std::vector<Hyphenator::BreakInfo> deduped; size_t writePos = 0;
deduped.reserve(breaks.size()); for (size_t readPos = 1; readPos < breaks.size(); ++readPos) {
for (const auto& entry : breaks) { if (breaks[readPos].byteOffset == breaks[writePos].byteOffset) {
if (!deduped.empty() && deduped.back().byteOffset == entry.byteOffset) { // Merge: explicit hyphen wins over soft hyphen at same offset.
// Merge entries so that an explicit hyphen wins over a soft hyphen at the same offset. breaks[writePos].requiresInsertedHyphen =
deduped.back().requiresInsertedHyphen = deduped.back().requiresInsertedHyphen || entry.requiresInsertedHyphen; breaks[writePos].requiresInsertedHyphen || breaks[readPos].requiresInsertedHyphen;
} else { } else {
deduped.push_back(entry); breaks[++writePos] = breaks[readPos];
} }
} }
breaks.resize(writePos + 1);
return deduped; return breaks;
} }
} // namespace } // namespace
@ -170,22 +150,25 @@ std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& w
return explicitBreakInfos; return explicitBreakInfos;
} }
// Ask language hyphenator for legal break points, optionally augment with naive fallback. // Ask language hyphenator for legal break points.
std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector<size_t>(); std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector<size_t>();
// Only add fallback breaks if needed and deduplicate if both language and fallback breaks exist.
if (includeFallback) { if (includeFallback) {
for (size_t idx = MIN_PREFIX_CP; idx + MIN_SUFFIX_CP <= cps.size(); ++idx) { for (size_t idx = MIN_PREFIX_CP; idx + MIN_SUFFIX_CP <= cps.size(); ++idx) {
indexes.push_back(idx); indexes.push_back(idx);
} }
// Only deduplicate if we have both language-specific and fallback breaks.
std::sort(indexes.begin(), indexes.end());
indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
} else if (indexes.empty()) {
return {};
} }
if (indexes.empty()) { if (indexes.empty()) {
return {}; return {};
} }
// Sort/deduplicate break indexes before converting them back to byte offsets.
std::sort(indexes.begin(), indexes.end());
indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
std::vector<Hyphenator::BreakInfo> breaks; std::vector<Hyphenator::BreakInfo> breaks;
breaks.reserve(indexes.size()); breaks.reserve(indexes.size());
for (const size_t idx : indexes) { for (const size_t idx : indexes) {
@ -195,7 +178,4 @@ std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& w
return breaks; return breaks;
} }
void Hyphenator::setPreferredLanguage(const std::string& lang) { void Hyphenator::setPreferredLanguage(const std::string& lang) { cachedHyphenator() = hyphenatorForLanguage(lang); }
preferredLanguage() = lang;
cachedHyphenator() = hyphenatorForLanguage(lang);
}