From 58314e9efdebd47f5704cfcc9f764f0ac937cb3b Mon Sep 17 00:00:00 2001
From: Arthur Tazhitdinov <lisnake@gmail.com>
Date: Thu, 8 Jan 2026 03:01:36 +0500
Subject: [PATCH] optimization

---
 lib/Epub/Epub/ParsedText.cpp                  | 40 +++++------
 .../Epub/hyphenation/HyphenationCommon.cpp    |  2 -
 lib/Epub/Epub/hyphenation/HyphenationCommon.h |  1 -
 lib/Epub/Epub/hyphenation/Hyphenator.cpp      | 70 +++++++------------
 4 files changed, 45 insertions(+), 68 deletions(-)

diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
index 8e3d2f30..12681915 100644
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@@ -207,28 +207,29 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
       const int spacing = isFirstWord ? 0 : spaceWidth;
       const int candidateWidth = spacing + wordWidths[currentIndex];
 
+      // Word fits on current line
       if (lineWidth + candidateWidth <= pageWidth) {
         lineWidth += candidateWidth;
-        currentIndex += 1;
+        ++currentIndex;
         continue;
       }
 
       // Word would overflow — try to split based on hyphenation points
       const int availableWidth = pageWidth - lineWidth - spacing;
-      const bool allowFallbackBreaks =
-          isFirstWord;  // Permit fallback breaks only when first word one the line still overflows
+      const bool allowFallbackBreaks = isFirstWord;  // Only for first word on line
+
       if (availableWidth > 0 &&
           hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths, allowFallbackBreaks)) {
-        // Prefix now fits; append it to this line and immediately move to the next line
+        // Prefix now fits; append it to this line and move to next line
         lineWidth += spacing + wordWidths[currentIndex];
-        currentIndex += 1;
+        ++currentIndex;
         break;
       }
 
       // Could not split: force at least one word per line to avoid infinite loop
       if (currentIndex == lineStart) {
         lineWidth += candidateWidth;
-        currentIndex += 1;
+        ++currentIndex;
       }
       break;
     }
@@ -249,19 +250,21 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
     return false;
   }
 
-  // Position iterators at the target word and its style entry.
+  // Get iterators to target word and style.
   auto wordIt = words.begin();
   auto styleIt = wordStyles.begin();
   std::advance(wordIt, wordIndex);
   std::advance(styleIt, wordIndex);
 
+  const std::string& word = *wordIt;
+  const auto style = *styleIt;
+
   // Collect candidate breakpoints (byte offsets and hyphen requirements).
-  const auto breakInfos = Hyphenator::breakOffsets(*wordIt, allowFallbackBreaks);
+  const auto breakInfos = Hyphenator::breakOffsets(word, allowFallbackBreaks);
   if (breakInfos.empty()) {
     return false;
   }
 
-  const auto style = *styleIt;
   size_t chosenOffset = 0;
   int chosenWidth = -1;
   bool chosenNeedsHyphen = true;
@@ -269,22 +272,19 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
   // Iterate over each legal breakpoint and retain the widest prefix that still fits.
   for (const auto& info : breakInfos) {
     const size_t offset = info.byteOffset;
-    if (offset == 0 || offset >= wordIt->size()) {
+    if (offset == 0 || offset >= word.size()) {
       continue;
     }
 
     const bool needsHyphen = info.requiresInsertedHyphen;
-    std::string prefix = wordIt->substr(0, offset);
-    const int prefixWidth = measureWordWidth(renderer, fontId, prefix, style, needsHyphen);
-    if (prefixWidth > availableWidth) {
-      continue;
+    const int prefixWidth = measureWordWidth(renderer, fontId, word.substr(0, offset), style, needsHyphen);
+    if (prefixWidth > availableWidth || prefixWidth <= chosenWidth) {
+      continue;  // Skip if too wide or not an improvement
     }
 
-    if (prefixWidth > chosenWidth) {
-      chosenWidth = prefixWidth;
-      chosenOffset = offset;
-      chosenNeedsHyphen = needsHyphen;
-    }
+    chosenWidth = prefixWidth;
+    chosenOffset = offset;
+    chosenNeedsHyphen = needsHyphen;
   }
 
   if (chosenWidth < 0) {
@@ -293,7 +293,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
   }
 
   // Split the word at the selected breakpoint and append a hyphen if required.
-  std::string remainder = wordIt->substr(chosenOffset);
+  std::string remainder = word.substr(chosenOffset);
   wordIt->resize(chosenOffset);
   if (chosenNeedsHyphen) {
     wordIt->push_back('-');
diff --git a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp
index a18361c3..541f6c34 100644
--- a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp
+++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp
@@ -59,8 +59,6 @@ bool isCyrillicConsonant(const uint32_t cp) { return isCyrillicLetter(cp) && !is
 
 bool isAlphabetic(const uint32_t cp) { return isLatinLetter(cp) || isCyrillicLetter(cp); }
 
-bool isVowel(const uint32_t cp) { return isLatinVowel(cp) || isCyrillicVowel(cp); }
-
 bool isPunctuation(const uint32_t cp) {
   switch (cp) {
     case '.':
diff --git a/lib/Epub/Epub/hyphenation/HyphenationCommon.h b/lib/Epub/Epub/hyphenation/HyphenationCommon.h
index 0e68ef04..927ac4c3 100644
--- a/lib/Epub/Epub/hyphenation/HyphenationCommon.h
+++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.h
@@ -25,7 +25,6 @@ bool isCyrillicVowel(uint32_t cp);
 bool isCyrillicConsonant(uint32_t cp);
 
 bool isAlphabetic(uint32_t cp);
-bool isVowel(uint32_t cp);
 bool isPunctuation(uint32_t cp);
 bool isAsciiDigit(uint32_t cp);
 bool isExplicitHyphen(uint32_t cp);
diff --git a/lib/Epub/Epub/hyphenation/Hyphenator.cpp b/lib/Epub/Epub/hyphenation/Hyphenator.cpp
index 4f22cafc..09d0ab66 100644
--- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp
+++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp
@@ -3,7 +3,6 @@
 #include <Utf8.h>
 
 #include <algorithm>
-#include <array>
 #include <vector>
 
 #include "EnglishHyphenator.h"
@@ -32,12 +31,6 @@ const LanguageHyphenator* hyphenatorForLanguage(const std::string& langTag) {
   return nullptr;
 }
 
-// Preferred language hint; empty means "auto".
-std::string& preferredLanguage() {
-  static std::string lang;
-  return lang;
-}
-
 // Cached hyphenator instance for the current preferred language.
 const LanguageHyphenator*& cachedHyphenator() {
   static const LanguageHyphenator* hyphenator = nullptr;
@@ -86,67 +79,54 @@ void trimTrailingFootnoteReference(std::vector<CodepointInfo>& cps) {
 
 // Asks the language hyphenator for legal break positions inside the word.
 std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps) {
-  if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
-    return {};
-  }
-
   if (const auto* hyphenator = cachedHyphenator()) {
-    auto indexes = hyphenator->breakIndexes(cps);
-    return indexes;
+    return hyphenator->breakIndexes(cps);
   }
-
   return {};
 }
 
 // Maps a codepoint index back to its byte offset inside the source word.
 size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t index) {
-  if (index >= cps.size()) {
-    return cps.empty() ? 0 : cps.back().byteOffset;
-  }
-  return cps[index].byteOffset;
+  return (index < cps.size()) ? cps[index].byteOffset : (cps.empty() ? 0 : cps.back().byteOffset);
 }
 
 // Builds a vector of break information from explicit hyphen markers in the given codepoints.
 std::vector<Hyphenator::BreakInfo> buildExplicitBreakInfos(const std::vector<CodepointInfo>& cps) {
   std::vector<Hyphenator::BreakInfo> breaks;
-  breaks.reserve(cps.size());
 
   // Scan every codepoint looking for explicit/soft hyphen markers that are surrounded by letters.
-  for (size_t i = 0; i < cps.size(); ++i) {
+  for (size_t i = 1; i + 1 < cps.size(); ++i) {
     const uint32_t cp = cps[i].value;
-    if (!isExplicitHyphen(cp) || i == 0 || i + 1 >= cps.size()) {
-      continue;  // Need at least one alphabetic character on both sides.
-    }
-    if (!isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
+    if (!isExplicitHyphen(cp) || !isAlphabetic(cps[i - 1].value) || !isAlphabetic(cps[i + 1].value)) {
       continue;
     }
     // Offset points to the next codepoint so rendering starts after the hyphen marker.
-    breaks.push_back({byteOffsetForIndex(cps, i + 1), isSoftHyphen(cp)});
+    breaks.push_back({cps[i + 1].byteOffset, isSoftHyphen(cp)});
   }
 
   if (breaks.empty()) {
     return breaks;
   }
 
-  // Sort by byte offset so we can deduplicate sequential markers.
-  // Multiple dash codepoints can point to the same byte offset once punctuation is trimmed; sort before merging.
+  // Sort by byte offset so we can deduplicate sequential markers in-place.
   std::sort(breaks.begin(), breaks.end(), [](const Hyphenator::BreakInfo& lhs, const Hyphenator::BreakInfo& rhs) {
     return lhs.byteOffset < rhs.byteOffset;
   });
 
-  // Ensure we keep a single entry per break while retaining the "needs hyphen" flag when any marker requested it.
-  std::vector<Hyphenator::BreakInfo> deduped;
-  deduped.reserve(breaks.size());
-  for (const auto& entry : breaks) {
-    if (!deduped.empty() && deduped.back().byteOffset == entry.byteOffset) {
-      // Merge entries so that an explicit hyphen wins over a soft hyphen at the same offset.
-      deduped.back().requiresInsertedHyphen = deduped.back().requiresInsertedHyphen || entry.requiresInsertedHyphen;
+  // Deduplicate in-place: merge entries at same offset while retaining "needs hyphen" flag.
+  size_t writePos = 0;
+  for (size_t readPos = 1; readPos < breaks.size(); ++readPos) {
+    if (breaks[readPos].byteOffset == breaks[writePos].byteOffset) {
+      // Merge: explicit hyphen wins over soft hyphen at same offset.
+      breaks[writePos].requiresInsertedHyphen =
+          breaks[writePos].requiresInsertedHyphen || breaks[readPos].requiresInsertedHyphen;
     } else {
-      deduped.push_back(entry);
+      breaks[++writePos] = breaks[readPos];
     }
   }
+  breaks.resize(writePos + 1);
 
-  return deduped;
+  return breaks;
 }
 
 }  // namespace
@@ -170,22 +150,25 @@ std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& w
     return explicitBreakInfos;
   }
 
-  // Ask language hyphenator for legal break points, optionally augment with naive fallback.
+  // Ask language hyphenator for legal break points.
   std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector<size_t>();
+
+  // Only add fallback breaks if needed and deduplicate if both language and fallback breaks exist.
   if (includeFallback) {
     for (size_t idx = MIN_PREFIX_CP; idx + MIN_SUFFIX_CP <= cps.size(); ++idx) {
       indexes.push_back(idx);
     }
+    // Only deduplicate if we have both language-specific and fallback breaks.
+    std::sort(indexes.begin(), indexes.end());
+    indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
+  } else if (indexes.empty()) {
+    return {};
   }
 
   if (indexes.empty()) {
     return {};
   }
 
-  // Sort/deduplicate break indexes before converting them back to byte offsets.
-  std::sort(indexes.begin(), indexes.end());
-  indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
-
   std::vector<Hyphenator::BreakInfo> breaks;
   breaks.reserve(indexes.size());
   for (const size_t idx : indexes) {
@@ -195,7 +178,4 @@ std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& w
   return breaks;
 }
 
-void Hyphenator::setPreferredLanguage(const std::string& lang) {
-  preferredLanguage() = lang;
-  cachedHyphenator() = hyphenatorForLanguage(lang);
-}
+void Hyphenator::setPreferredLanguage(const std::string& lang) { cachedHyphenator() = hyphenatorForLanguage(lang); }