Refactor hyphenation logic to return detailed break information, enhancing line breaking capabilities

This commit is contained in:
Arthur Tazhitdinov 2026-01-07 03:54:43 +05:00
parent f998180353
commit 2315513ca1
3 changed files with 34 additions and 20 deletions

View File

@ -199,7 +199,8 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
return lineBreakIndices;
}
// Splits words[wordIndex] into prefix+hyphen and remainder when a legal breakpoint fits the available width.
// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the
// available width.
bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
const int fontId, std::vector<uint16_t>& wordWidths,
const bool allowFallbackBreaks) {
@ -212,22 +213,27 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
std::advance(wordIt, wordIndex);
std::advance(styleIt, wordIndex);
const auto breakOffsets = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks);
if (breakOffsets.empty()) {
const auto breakInfos = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks);
if (breakInfos.empty()) {
return false;
}
const auto style = *styleIt;
size_t chosenOffset = 0;
int chosenWidth = -1;
bool chosenNeedsHyphen = true;
for (const size_t offset : breakOffsets) {
for (const auto& info : breakInfos) {
const size_t offset = info.byteOffset;
if (offset == 0 || offset >= wordIt->size()) {
continue;
}
const bool needsHyphen = info.requiresInsertedHyphen;
std::string prefix = wordIt->substr(0, offset);
prefix.push_back('-');
if (needsHyphen) {
prefix.push_back('-');
}
const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
if (prefixWidth > availableWidth) {
continue;
@ -236,6 +242,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
if (prefixWidth > chosenWidth) {
chosenWidth = prefixWidth;
chosenOffset = offset;
chosenNeedsHyphen = needsHyphen;
}
}
@ -245,7 +252,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
std::string remainder = wordIt->substr(chosenOffset);
wordIt->resize(chosenOffset);
wordIt->push_back('-');
if (chosenNeedsHyphen) {
wordIt->push_back('-');
}
auto insertWordIt = std::next(wordIt);
auto insertStyleIt = std::next(styleIt);

View File

@ -135,9 +135,20 @@ size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t in
return cps[index].byteOffset;
}
std::vector<Hyphenator::BreakInfo> buildBreakInfoVector(const std::vector<size_t>& indexes,
const std::vector<CodepointInfo>& cps,
const bool requiresHyphen) {
std::vector<Hyphenator::BreakInfo> breaks;
breaks.reserve(indexes.size());
for (const size_t idx : indexes) {
breaks.push_back({byteOffsetForIndex(cps, idx), requiresHyphen});
}
return breaks;
}
} // namespace
std::vector<size_t> Hyphenator::breakOffsets(const std::string& word, const bool includeFallback) {
std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& word, const bool includeFallback) {
if (word.empty()) {
return {};
}
@ -153,12 +164,7 @@ std::vector<size_t> Hyphenator::breakOffsets(const std::string& word, const bool
if (!explicitIndexes.empty()) {
std::sort(explicitIndexes.begin(), explicitIndexes.end());
explicitIndexes.erase(std::unique(explicitIndexes.begin(), explicitIndexes.end()), explicitIndexes.end());
std::vector<size_t> byteOffsets;
byteOffsets.reserve(explicitIndexes.size());
for (const size_t idx : explicitIndexes) {
byteOffsets.push_back(byteOffsetForIndex(cps, idx));
}
return byteOffsets;
return buildBreakInfoVector(explicitIndexes, cps, false);
}
std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector<size_t>();
@ -175,10 +181,5 @@ std::vector<size_t> Hyphenator::breakOffsets(const std::string& word, const bool
std::sort(indexes.begin(), indexes.end());
indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
std::vector<size_t> byteOffsets;
byteOffsets.reserve(indexes.size());
for (const size_t idx : indexes) {
byteOffsets.push_back(byteOffsetForIndex(cps, idx));
}
return byteOffsets;
return buildBreakInfoVector(indexes, cps, true);
}

View File

@ -6,7 +6,11 @@
class Hyphenator {
public:
struct BreakInfo {
size_t byteOffset;
bool requiresInsertedHyphen;
};
// Returns byte offsets where the word may be hyphenated. When includeFallback is true, all positions obeying the
// minimum prefix/suffix constraints are returned even if no language-specific rule matches.
static std::vector<size_t> breakOffsets(const std::string& word, bool includeFallback);
static std::vector<BreakInfo> breakOffsets(const std::string& word, bool includeFallback);
};