diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 305ba1a2..06803755 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -199,7 +199,8 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r return lineBreakIndices; } -// Splits words[wordIndex] into prefix+hyphen and remainder when a legal breakpoint fits the available width. +// Splits words[wordIndex] into prefix (adding a hyphen only when needed) and remainder when a legal breakpoint fits the +// available width. bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer, const int fontId, std::vector& wordWidths, const bool allowFallbackBreaks) { @@ -212,22 +213,27 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl std::advance(wordIt, wordIndex); std::advance(styleIt, wordIndex); - const auto breakOffsets = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks); - if (breakOffsets.empty()) { + const auto breakInfos = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks); + if (breakInfos.empty()) { return false; } const auto style = *styleIt; size_t chosenOffset = 0; int chosenWidth = -1; + bool chosenNeedsHyphen = true; - for (const size_t offset : breakOffsets) { + for (const auto& info : breakInfos) { + const size_t offset = info.byteOffset; if (offset == 0 || offset >= wordIt->size()) { continue; } + const bool needsHyphen = info.requiresInsertedHyphen; std::string prefix = wordIt->substr(0, offset); - prefix.push_back('-'); + if (needsHyphen) { + prefix.push_back('-'); + } const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style); if (prefixWidth > availableWidth) { continue; @@ -236,6 +242,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl if (prefixWidth > chosenWidth) { chosenWidth = prefixWidth; chosenOffset = offset; + chosenNeedsHyphen = needsHyphen; } } @@ -245,7 +252,9 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl std::string remainder = wordIt->substr(chosenOffset); wordIt->resize(chosenOffset); - wordIt->push_back('-'); + if (chosenNeedsHyphen) { + wordIt->push_back('-'); + } auto insertWordIt = std::next(wordIt); auto insertStyleIt = std::next(styleIt); diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp index 8b0d5138..21d890b7 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp +++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp @@ -135,9 +135,20 @@ size_t byteOffsetForIndex(const std::vector& cps, const size_t in return cps[index].byteOffset; } +std::vector buildBreakInfoVector(const std::vector& indexes, + const std::vector& cps, + const bool requiresHyphen) { + std::vector breaks; + breaks.reserve(indexes.size()); + for (const size_t idx : indexes) { + breaks.push_back({byteOffsetForIndex(cps, idx), requiresHyphen}); + } + return breaks; +} + } // namespace -std::vector Hyphenator::breakOffsets(const std::string& word, const bool includeFallback) { +std::vector Hyphenator::breakOffsets(const std::string& word, const bool includeFallback) { if (word.empty()) { return {}; } @@ -153,12 +164,7 @@ std::vector Hyphenator::breakOffsets(const std::string& word, const bool if (!explicitIndexes.empty()) { std::sort(explicitIndexes.begin(), explicitIndexes.end()); explicitIndexes.erase(std::unique(explicitIndexes.begin(), explicitIndexes.end()), explicitIndexes.end()); - std::vector byteOffsets; - byteOffsets.reserve(explicitIndexes.size()); - for (const size_t idx : explicitIndexes) { - byteOffsets.push_back(byteOffsetForIndex(cps, idx)); - } - return byteOffsets; + return buildBreakInfoVector(explicitIndexes, cps, false); } std::vector indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector(); @@ -175,10 +181,5 @@ std::vector Hyphenator::breakOffsets(const std::string& word, const bool std::sort(indexes.begin(), indexes.end()); indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end()); - std::vector byteOffsets; - byteOffsets.reserve(indexes.size()); - for (const size_t idx : indexes) { - byteOffsets.push_back(byteOffsetForIndex(cps, idx)); - } - return byteOffsets; + return buildBreakInfoVector(indexes, cps, true); } diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.h b/lib/Epub/Epub/hyphenation/Hyphenator.h index ba0319df..3d1ed040 100644 --- a/lib/Epub/Epub/hyphenation/Hyphenator.h +++ b/lib/Epub/Epub/hyphenation/Hyphenator.h @@ -6,7 +6,11 @@ class Hyphenator { public: + struct BreakInfo { + size_t byteOffset; + bool requiresInsertedHyphen; + }; // Returns byte offsets where the word may be hyphenated. When includeFallback is true, all positions obeying the // minimum prefix/suffix constraints are returned even if no language-specific rule matches. - static std::vector breakOffsets(const std::string& word, bool includeFallback); + static std::vector breakOffsets(const std::string& word, bool includeFallback); }; \ No newline at end of file