mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-06 07:37:37 +03:00
Refactor hyphenation logic to return detailed break information, enhancing line breaking capabilities
This commit is contained in:
parent
f998180353
commit
2315513ca1
@ -199,7 +199,8 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
|
||||
return lineBreakIndices;
|
||||
}
|
||||
|
||||
// Splits words[wordIndex] into prefix+hyphen and remainder when a legal breakpoint fits the available width.
|
||||
// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the
|
||||
// available width.
|
||||
bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
|
||||
const int fontId, std::vector<uint16_t>& wordWidths,
|
||||
const bool allowFallbackBreaks) {
|
||||
@ -212,22 +213,27 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
||||
std::advance(wordIt, wordIndex);
|
||||
std::advance(styleIt, wordIndex);
|
||||
|
||||
const auto breakOffsets = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks);
|
||||
if (breakOffsets.empty()) {
|
||||
const auto breakInfos = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks);
|
||||
if (breakInfos.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto style = *styleIt;
|
||||
size_t chosenOffset = 0;
|
||||
int chosenWidth = -1;
|
||||
bool chosenNeedsHyphen = true;
|
||||
|
||||
for (const size_t offset : breakOffsets) {
|
||||
for (const auto& info : breakInfos) {
|
||||
const size_t offset = info.byteOffset;
|
||||
if (offset == 0 || offset >= wordIt->size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool needsHyphen = info.requiresInsertedHyphen;
|
||||
std::string prefix = wordIt->substr(0, offset);
|
||||
prefix.push_back('-');
|
||||
if (needsHyphen) {
|
||||
prefix.push_back('-');
|
||||
}
|
||||
const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
|
||||
if (prefixWidth > availableWidth) {
|
||||
continue;
|
||||
@ -236,6 +242,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
||||
if (prefixWidth > chosenWidth) {
|
||||
chosenWidth = prefixWidth;
|
||||
chosenOffset = offset;
|
||||
chosenNeedsHyphen = needsHyphen;
|
||||
}
|
||||
}
|
||||
|
||||
@ -245,7 +252,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
|
||||
|
||||
std::string remainder = wordIt->substr(chosenOffset);
|
||||
wordIt->resize(chosenOffset);
|
||||
wordIt->push_back('-');
|
||||
if (chosenNeedsHyphen) {
|
||||
wordIt->push_back('-');
|
||||
}
|
||||
|
||||
auto insertWordIt = std::next(wordIt);
|
||||
auto insertStyleIt = std::next(styleIt);
|
||||
|
||||
@ -135,9 +135,20 @@ size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t in
|
||||
return cps[index].byteOffset;
|
||||
}
|
||||
|
||||
std::vector<Hyphenator::BreakInfo> buildBreakInfoVector(const std::vector<size_t>& indexes,
|
||||
const std::vector<CodepointInfo>& cps,
|
||||
const bool requiresHyphen) {
|
||||
std::vector<Hyphenator::BreakInfo> breaks;
|
||||
breaks.reserve(indexes.size());
|
||||
for (const size_t idx : indexes) {
|
||||
breaks.push_back({byteOffsetForIndex(cps, idx), requiresHyphen});
|
||||
}
|
||||
return breaks;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<size_t> Hyphenator::breakOffsets(const std::string& word, const bool includeFallback) {
|
||||
std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& word, const bool includeFallback) {
|
||||
if (word.empty()) {
|
||||
return {};
|
||||
}
|
||||
@ -153,12 +164,7 @@ std::vector<size_t> Hyphenator::breakOffsets(const std::string& word, const bool
|
||||
if (!explicitIndexes.empty()) {
|
||||
std::sort(explicitIndexes.begin(), explicitIndexes.end());
|
||||
explicitIndexes.erase(std::unique(explicitIndexes.begin(), explicitIndexes.end()), explicitIndexes.end());
|
||||
std::vector<size_t> byteOffsets;
|
||||
byteOffsets.reserve(explicitIndexes.size());
|
||||
for (const size_t idx : explicitIndexes) {
|
||||
byteOffsets.push_back(byteOffsetForIndex(cps, idx));
|
||||
}
|
||||
return byteOffsets;
|
||||
return buildBreakInfoVector(explicitIndexes, cps, false);
|
||||
}
|
||||
|
||||
std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector<size_t>();
|
||||
@ -175,10 +181,5 @@ std::vector<size_t> Hyphenator::breakOffsets(const std::string& word, const bool
|
||||
std::sort(indexes.begin(), indexes.end());
|
||||
indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
|
||||
|
||||
std::vector<size_t> byteOffsets;
|
||||
byteOffsets.reserve(indexes.size());
|
||||
for (const size_t idx : indexes) {
|
||||
byteOffsets.push_back(byteOffsetForIndex(cps, idx));
|
||||
}
|
||||
return byteOffsets;
|
||||
return buildBreakInfoVector(indexes, cps, true);
|
||||
}
|
||||
|
||||
@ -6,7 +6,11 @@
|
||||
|
||||
class Hyphenator {
|
||||
public:
|
||||
struct BreakInfo {
|
||||
size_t byteOffset;
|
||||
bool requiresInsertedHyphen;
|
||||
};
|
||||
// Returns byte offsets where the word may be hyphenated. When includeFallback is true, all positions obeying the
|
||||
// minimum prefix/suffix constraints are returned even if no language-specific rule matches.
|
||||
static std::vector<size_t> breakOffsets(const std::string& word, bool includeFallback);
|
||||
static std::vector<BreakInfo> breakOffsets(const std::string& word, bool includeFallback);
|
||||
};
|
||||
Loading…
Reference in New Issue
Block a user