Merge c813a2f075 into 424594488f

2025-12-19 07:37:41 +03:00 · 2025-12-18 12:03:27 +00:00 · 2025-12-18 12:03:27 +00:00 · f6f00a5e26
commit f6f00a5e26
parent 424594488f c813a2f075
10 changed files with 947 additions and 120 deletions
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@ -3,11 +3,12 @@
 #include <GfxRenderer.h>
 #include <algorithm>
-#include <cmath>
+#include <iterator>
-#include <functional>
+#include <utility>
 #include <limits>
 #include <vector>
 #include "hyphenation/Hyphenator.h"
 constexpr int MAX_COST = std::numeric_limits<int>::max();
 void ParsedText::addWord(std::string word, const EpdFontStyle fontStyle) {
@ -24,148 +25,168 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
    return;
  }
  const size_t totalWordCount = words.size();
  const int pageWidth = renderer.getScreenWidth() - horizontalMargin;
  if (pageWidth <= 0) {
    words.clear();
    wordStyles.clear();
    return;
  }
  const int spaceWidth = renderer.getSpaceWidth(fontId);
-  // width of 1em to indent first line of paragraph if Extra Spacing is enabled
+  const bool allowIndent = !extraParagraphSpacing && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN);
-  const int indentWidth = (!extraParagraphSpacing) ? 1 * renderer.getTextWidth(fontId, "m", REGULAR) : 0;
+  const int indentWidth = allowIndent ? renderer.getTextWidth(fontId, "m", REGULAR) : 0;
  const int firstLinePageWidth = allowIndent ? std::max(pageWidth - indentWidth, 0) : pageWidth;
  auto pageWidthForLine = [&](const bool isFirstLine) -> int { return isFirstLine ? firstLinePageWidth : pageWidth; };
-  std::vector<uint16_t> wordWidths;
+  auto wordIt = words.begin();
-  wordWidths.reserve(totalWordCount);
+  auto styleIt = wordStyles.begin();
  auto lineStartWordIt = wordIt;
  auto lineStartStyleIt = styleIt;
-  auto wordsIt = words.begin();
+  int lineWidthWithSpaces = 0;
-  auto wordStylesIt = wordStyles.begin();
+  int lineWordWidthSum = 0;
  size_t lineWordCount = 0;
  std::vector<uint16_t> lineWordWidths;
  lineWordWidths.reserve(16);
-  while (wordsIt != words.end()) {
+  size_t producedLines = 0;
    wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt));
    std::advance(wordsIt, 1);
    std::advance(wordStylesIt, 1);
  }
  // DP table to store the minimum badness (cost) of lines starting at index i
  std::vector<int> dp(totalWordCount);
  // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
  std::vector<size_t> ans(totalWordCount);
  // Base Case
  dp[totalWordCount - 1] = 0;
  ans[totalWordCount - 1] = totalWordCount - 1;
  for (int i = totalWordCount - 2; i >= 0; --i) {
    int currlen = -spaceWidth + indentWidth;
    dp[i] = MAX_COST;
    for (size_t j = i; j < totalWordCount; ++j) {
      // Current line length: previous width + space + current word width
      currlen += wordWidths[j] + spaceWidth;
      if (currlen > pageWidth) {
        break;
      }
      int cost;
      if (j == totalWordCount - 1) {
        cost = 0;  // Last line
      } else {
        const int remainingSpace = pageWidth - currlen;
        // Use long long for the square to prevent overflow
        const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
        if (cost_ll > MAX_COST) {
          cost = MAX_COST;
        } else {
          cost = static_cast<int>(cost_ll);
        }
      }
      if (cost < dp[i]) {
        dp[i] = cost;
        ans[i] = j;  // j is the index of the last word in this optimal line
      }
    }
  }
  // Stores the index of the word that starts the next line (last_word_index + 1)
  std::vector<size_t> lineBreakIndices;
  size_t currentWordIndex = 0;
  constexpr size_t MAX_LINES = 1000;
-  while (currentWordIndex < totalWordCount) {
+  auto commitLine = [&](const bool isLastLine) {
-    if (lineBreakIndices.size() >= MAX_LINES) {
+    if (lineWordCount == 0) {
-      break;
+      return;
    }
-    size_t nextBreakIndex = ans[currentWordIndex] + 1;
+    const bool isFirstLine = producedLines == 0;
-    lineBreakIndices.push_back(nextBreakIndex);
+    const int linePageWidth = pageWidthForLine(isFirstLine);
-    currentWordIndex = nextBreakIndex;
+    std::list<std::string> lineWords;
-  }
+    std::list<EpdFontStyle> lineStyles;
    auto wordEndIt = wordIt;
    auto styleEndIt = styleIt;
-  // Initialize iterators for consumption
+    lineWords.splice(lineWords.begin(), words, lineStartWordIt, wordEndIt);
-  auto wordStartIt = words.begin();
+    lineStyles.splice(lineStyles.begin(), wordStyles, lineStartStyleIt, styleEndIt);
  auto wordStyleStartIt = wordStyles.begin();
  size_t wordWidthIndex = 0;
-  size_t lastBreakAt = 0;
+    const int gaps = lineWordCount > 0 ? static_cast<int>(lineWordCount - 1) : 0;
-  for (const size_t lineBreak : lineBreakIndices) {
+    const int baseSpaceTotal = spaceWidth * gaps;
-    const size_t lineWordCount = lineBreak - lastBreakAt;
+    const int spaceBudget = linePageWidth - lineWordWidthSum;
    // Calculate end iterators for the range to splice
    auto wordEndIt = wordStartIt;
    auto wordStyleEndIt = wordStyleStartIt;
    std::advance(wordEndIt, lineWordCount);
    std::advance(wordStyleEndIt, lineWordCount);
    // Calculate total word width for this line
    int lineWordWidthSum = 0;
    for (size_t i = 0; i < lineWordCount; ++i) {
      lineWordWidthSum += wordWidths[wordWidthIndex + i];
    }
    // Calculate spacing
    int spareSpace = pageWidth - lineWordWidthSum;
    if (wordWidthIndex == 0) {
      spareSpace -= indentWidth;
    }
    int spacing = spaceWidth;
-    const bool isLastLine = lineBreak == totalWordCount;
+    int spacingRemainder = 0;
-
+    if (style == TextBlock::JUSTIFIED && !isLastLine && gaps > 0) {
-    if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
+      const int additional = std::max(0, spaceBudget - baseSpaceTotal);
-      spacing = spareSpace / (lineWordCount - 1);
+      spacing = spaceWidth + (gaps > 0 ? additional / gaps : 0);
      spacingRemainder = (gaps > 0) ? additional % gaps : 0;
    }
-    // Calculate initial x position
+    int renderedWidth = lineWordWidthSum;
-    uint16_t xpos = (wordWidthIndex == 0) ? indentWidth : 0;
+    if (gaps > 0) {
      renderedWidth += spacing * gaps;
    }
    uint16_t xpos = 0;
    if (style == TextBlock::RIGHT_ALIGN) {
-      xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
+      xpos = renderedWidth < linePageWidth ? linePageWidth - renderedWidth : 0;
    } else if (style == TextBlock::CENTER_ALIGN) {
-      xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
+      xpos = renderedWidth < linePageWidth ? (linePageWidth - renderedWidth) / 2 : 0;
    } else if (allowIndent && isFirstLine) {
      xpos = indentWidth;
    }
    // Pre-calculate X positions for words
    std::list<uint16_t> lineXPos;
-    for (size_t i = 0; i < lineWordCount; ++i) {
+    for (size_t idx = 0; idx < lineWordWidths.size(); ++idx) {
      const uint16_t currentWordWidth = wordWidths[wordWidthIndex + i];
      lineXPos.push_back(xpos);
-      xpos += currentWordWidth + spacing;
+      xpos += lineWordWidths[idx];
      if (idx + 1 < lineWordWidths.size()) {
        int gap = spacing;
        if (spacingRemainder > 0) {
          gap += 1;
          spacingRemainder--;
        }
        xpos += gap;
      }
    }
-    // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
+    processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineStyles), style));
    std::list<std::string> lineWords;
    lineWords.splice(lineWords.begin(), words, wordStartIt, wordEndIt);
    std::list<EpdFontStyle> lineWordStyles;
    lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyleStartIt, wordStyleEndIt);
-    processLine(
+    producedLines++;
-        std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
+    lineWordWidths.clear();
    lineWordWidthSum = 0;
    lineWidthWithSpaces = 0;
    lineWordCount = 0;
    lineStartWordIt = wordIt;
    lineStartStyleIt = styleIt;
  };
-    // Update pointers/indices for the next line
+  while (wordIt != words.end() && producedLines < MAX_LINES) {
-    wordStartIt = wordEndIt;
+    const int currentLinePageWidth = pageWidthForLine(producedLines == 0);
-    wordStyleStartIt = wordStyleEndIt;
+
-    wordWidthIndex += lineWordCount;
+    if (lineWordCount == 0) {
-    lastBreakAt = lineBreak;
+      lineStartWordIt = wordIt;
      lineStartStyleIt = styleIt;
    }
    const int wordWidth = renderer.getTextWidth(fontId, wordIt->c_str(), *styleIt);
    const int gapWidth = (lineWordCount > 0) ? spaceWidth : 0;
    const int candidateWidth = lineWidthWithSpaces + gapWidth + wordWidth;
    if (candidateWidth <= currentLinePageWidth) {
      lineWordWidths.push_back(static_cast<uint16_t>(wordWidth));
      lineWordWidthSum += wordWidth;
      lineWidthWithSpaces = candidateWidth;
      lineWordCount++;
      ++wordIt;
      ++styleIt;
      continue;
    }
    const int availableWidth = currentLinePageWidth - lineWidthWithSpaces - gapWidth;
    if (lineWordCount > 0 && availableWidth <= 0) {
      commitLine(false);
      continue;
    }
    if (lineWordCount > 0 && availableWidth > 0) {
      HyphenationResult split;
      if (Hyphenator::splitWord(renderer, fontId, *wordIt, *styleIt, availableWidth, &split, false)) {
        *wordIt = std::move(split.head);
        auto nextWordIt = std::next(wordIt);
        auto nextStyleIt = std::next(styleIt);
        words.insert(nextWordIt, std::move(split.tail));
        wordStyles.insert(nextStyleIt, *styleIt);
        continue;
      }
    }
    if (lineWordCount == 0) {
      HyphenationResult split;
      if (Hyphenator::splitWord(renderer, fontId, *wordIt, *styleIt, currentLinePageWidth, &split, true)) {
        *wordIt = std::move(split.head);
        auto nextWordIt = std::next(wordIt);
        auto nextStyleIt = std::next(styleIt);
        words.insert(nextWordIt, std::move(split.tail));
        wordStyles.insert(nextStyleIt, *styleIt);
        continue;
      }
      lineWordWidths.push_back(static_cast<uint16_t>(wordWidth));
      lineWordWidthSum += wordWidth;
      lineWidthWithSpaces = candidateWidth;
      lineWordCount = 1;
      ++wordIt;
      ++styleIt;
      commitLine(wordIt == words.end());
      continue;
    }
    commitLine(false);
  }
  if (lineWordCount > 0 && producedLines < MAX_LINES) {
    commitLine(true);
  }
  words.clear();
  wordStyles.clear();
 }
--- a/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp
+++ b/lib/Epub/Epub/hyphenation/EnglishHyphenator.cpp
@ -0,0 +1,278 @@
 #include "EnglishHyphenator.h"
 #include <algorithm>
 #include <initializer_list>
 #include <vector>
 namespace {
 char lowerLatinChar(const uint32_t cp) {
  if (!isLatinLetter(cp)) {
    return 0;
  }
  return static_cast<char>(toLowerLatin(cp));
 }
 bool isEnglishApproximantChar(const char c) { return c == 'l' || c == 'r' || c == 'w' || c == 'y'; }
 bool isEnglishStopChar(const char c) {
  switch (c) {
    case 'p':
    case 'b':
    case 't':
    case 'd':
    case 'k':
    case 'g':
    case 'c':
    case 'q':
      return true;
    default:
      return false;
  }
 }
 bool isEnglishFricativeChar(const char c) {
  switch (c) {
    case 'f':
    case 'v':
    case 's':
    case 'z':
    case 'h':
    case 'x':
      return true;
    default:
      return false;
  }
 }
 struct CharPair {
  char first;
  char second;
 };
 bool matchesDigraph(const char first, const char second, const std::initializer_list<CharPair>& pairs) {
  for (const auto& pair : pairs) {
    if (pair.first == first && pair.second == second) {
      return true;
    }
  }
  return false;
 }
 bool isEnglishDiphthong(const uint32_t first, const uint32_t second) {
  if (!isLatinLetter(first) || !isLatinLetter(second)) {
    return false;
  }
  const auto f = static_cast<char>(toLowerLatin(first));
  const auto s = static_cast<char>(toLowerLatin(second));
  switch (f) {
    case 'a':
      return s == 'i' || s == 'y' || s == 'u';
    case 'e':
      return s == 'a' || s == 'e' || s == 'i' || s == 'o' || s == 'u' || s == 'y';
    case 'i':
      return s == 'e' || s == 'u' || s == 'a';
    case 'o':
      return s == 'a' || s == 'e' || s == 'i' || s == 'o' || s == 'u' || s == 'y';
    case 'u':
      return s == 'i' || s == 'a' || s == 'e';
  }
  return false;
 }
 bool isValidEnglishOnsetBigram(const uint32_t firstCp, const uint32_t secondCp) {
  const char first = lowerLatinChar(firstCp);
  const char second = lowerLatinChar(secondCp);
  if (!first || !second) {
    return false;
  }
  if (matchesDigraph(first, second,
                     {{'c', 'h'},
                      {'s', 'h'},
                      {'t', 'h'},
                      {'p', 'h'},
                      {'w', 'h'},
                      {'w', 'r'},
                      {'k', 'n'},
                      {'g', 'n'},
                      {'p', 's'},
                      {'p', 't'},
                      {'p', 'n'},
                      {'r', 'h'}})) {
    return true;
  }
  if (isEnglishStopChar(first) && isEnglishApproximantChar(second)) {
    return true;
  }
  if (isEnglishFricativeChar(first) && isEnglishApproximantChar(second)) {
    return true;
  }
  if (first == 's' && (second == 'p' || second == 't' || second == 'k' || second == 'm' || second == 'n' ||
                       second == 'f' || second == 'l' || second == 'w' || second == 'c')) {
    return true;
  }
  if (second == 'y' && (first == 'p' || first == 'b' || first == 't' || first == 'd' || first == 'f' || first == 'k' ||
                        first == 'g' || first == 'h' || first == 'm' || first == 'n' || first == 'l' || first == 's')) {
    return true;
  }
  return false;
 }
 bool isValidEnglishOnsetTrigram(const uint32_t firstCp, const uint32_t secondCp, const uint32_t thirdCp) {
  const char first = lowerLatinChar(firstCp);
  const char second = lowerLatinChar(secondCp);
  const char third = lowerLatinChar(thirdCp);
  if (!first || !second || !third) {
    return false;
  }
  if (first == 's') {
    if (second == 'p' && (third == 'l' || third == 'r' || third == 'w')) {
      return true;
    }
    if (second == 't' && (third == 'r' || third == 'w' || third == 'y')) {
      return true;
    }
    if (second == 'k' && (third == 'l' || third == 'r' || third == 'w')) {
      return true;
    }
    if (second == 'c' && (third == 'l' || third == 'r')) {
      return true;
    }
    if (second == 'f' && third == 'r') {
      return true;
    }
    if (second == 'h' && third == 'r') {
      return true;
    }
  }
  if (first == 't' && second == 'h' && third == 'r') {
    return true;
  }
  return false;
 }
 bool englishClusterIsValidOnset(const std::vector<CodepointInfo>& cps, const size_t start, const size_t end) {
  if (start >= end) {
    return false;
  }
  for (size_t i = start; i < end; ++i) {
    const char ch = lowerLatinChar(cps[i].value);
    if (!ch) {
      return false;
    }
    if (!isLatinConsonant(cps[i].value) && ch != 'y') {
      return false;
    }
  }
  const size_t len = end - start;
  if (len == 1) {
    return true;
  }
  if (len == 2) {
    return isValidEnglishOnsetBigram(cps[start].value, cps[start + 1].value);
  }
  if (len == 3) {
    return isValidEnglishOnsetTrigram(cps[start].value, cps[start + 1].value, cps[start + 2].value);
  }
  return false;
 }
 size_t englishOnsetLength(const std::vector<CodepointInfo>& cps, const size_t clusterStart, const size_t clusterEnd) {
  const size_t clusterLen = clusterEnd - clusterStart;
  if (clusterLen == 0) {
    return 0;
  }
  const size_t maxLen = std::min<size_t>(3, clusterLen);
  for (size_t len = maxLen; len >= 1; --len) {
    const size_t suffixStart = clusterEnd - len;
    if (englishClusterIsValidOnset(cps, suffixStart, clusterEnd)) {
      return len;
    }
  }
  return 1;
 }
 bool nextToApostrophe(const std::vector<CodepointInfo>& cps, const size_t index) {
  if (index == 0 || index >= cps.size()) {
    return false;
  }
  const auto left = cps[index - 1].value;
  const auto right = cps[index].value;
  return left == '\'' || right == '\'';
 }
 std::vector<size_t> englishBreakIndexes(const std::vector<CodepointInfo>& cps) {
  std::vector<size_t> indexes;
  if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
    return indexes;
  }
  std::vector<size_t> vowelPositions;
  vowelPositions.reserve(cps.size());
  for (size_t i = 0; i < cps.size(); ++i) {
    if (isLatinVowel(cps[i].value)) {
      vowelPositions.push_back(i);
    }
  }
  if (vowelPositions.size() < 2) {
    return indexes;
  }
  for (size_t v = 0; v + 1 < vowelPositions.size(); ++v) {
    const size_t leftVowel = vowelPositions[v];
    const size_t rightVowel = vowelPositions[v + 1];
    if (rightVowel - leftVowel == 1) {
      if (!isEnglishDiphthong(cps[leftVowel].value, cps[rightVowel].value) && rightVowel >= MIN_PREFIX_CP &&
          cps.size() - rightVowel >= MIN_SUFFIX_CP && !nextToApostrophe(cps, rightVowel)) {
        indexes.push_back(rightVowel);
      }
      continue;
    }
    const size_t clusterStart = leftVowel + 1;
    const size_t clusterEnd = rightVowel;
    const size_t onsetLen = englishOnsetLength(cps, clusterStart, clusterEnd);
    size_t breakIndex = clusterEnd - onsetLen;
    if (breakIndex < MIN_PREFIX_CP || cps.size() - breakIndex < MIN_SUFFIX_CP) {
      continue;
    }
    if (nextToApostrophe(cps, breakIndex)) {
      continue;
    }
    indexes.push_back(breakIndex);
  }
  std::sort(indexes.begin(), indexes.end());
  indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
  return indexes;
 }
 }  // namespace
 const EnglishHyphenator& EnglishHyphenator::instance() {
  static EnglishHyphenator instance;
  return instance;
 }
 Script EnglishHyphenator::script() const { return Script::Latin; }
 std::vector<size_t> EnglishHyphenator::breakIndexes(const std::vector<CodepointInfo>& cps) const {
  return englishBreakIndexes(cps);
 }
--- a/lib/Epub/Epub/hyphenation/EnglishHyphenator.h
+++ b/lib/Epub/Epub/hyphenation/EnglishHyphenator.h
@ -0,0 +1,14 @@
 #pragma once
 #include "LanguageHyphenator.h"
 class EnglishHyphenator final : public LanguageHyphenator {
 public:
  static const EnglishHyphenator& instance();
  Script script() const override;
  std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const override;
 private:
  EnglishHyphenator() = default;
 };
--- a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp
+++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp
@ -0,0 +1,82 @@
 #include "HyphenationCommon.h"
 namespace {
 uint32_t toLowerLatinImpl(const uint32_t cp) {
  if (cp >= 'A' && cp <= 'Z') {
    return cp - 'A' + 'a';
  }
  return cp;
 }
 uint32_t toLowerCyrillicImpl(const uint32_t cp) {
  if (cp >= 0x0410 && cp <= 0x042F) {
    return cp + 0x20;
  }
  if (cp == 0x0401) {
    return 0x0451;
  }
  return cp;
 }
 }  // namespace
 uint32_t toLowerLatin(const uint32_t cp) { return toLowerLatinImpl(cp); }
 uint32_t toLowerCyrillic(const uint32_t cp) { return toLowerCyrillicImpl(cp); }
 bool isLatinLetter(const uint32_t cp) { return (cp >= 'A' && cp <= 'Z') || (cp >= 'a' && cp <= 'z'); }
 bool isLatinVowel(uint32_t cp) {
  cp = toLowerLatinImpl(cp);
  return cp == 'a' || cp == 'e' || cp == 'i' || cp == 'o' || cp == 'u' || cp == 'y';
 }
 bool isLatinConsonant(const uint32_t cp) { return isLatinLetter(cp) && !isLatinVowel(cp); }
 bool isCyrillicLetter(const uint32_t cp) { return (cp >= 0x0400 && cp <= 0x052F); }
 bool isCyrillicVowel(uint32_t cp) {
  cp = toLowerCyrillicImpl(cp);
  switch (cp) {
    case 0x0430:  // а
    case 0x0435:  // е
    case 0x0451:  // ё
    case 0x0438:  // и
    case 0x043E:  // о
    case 0x0443:  // у
    case 0x044B:  // ы
    case 0x044D:  // э
    case 0x044E:  // ю
    case 0x044F:  // я
      return true;
    default:
      return false;
  }
 }
 bool isCyrillicConsonant(const uint32_t cp) { return isCyrillicLetter(cp) && !isCyrillicVowel(cp); }
 bool isAlphabetic(const uint32_t cp) { return isLatinLetter(cp) || isCyrillicLetter(cp); }
 bool isVowel(const uint32_t cp) { return isLatinVowel(cp) || isCyrillicVowel(cp); }
 Script detectScript(const std::vector<CodepointInfo>& cps) {
  bool hasLatin = false;
  bool hasCyrillic = false;
  for (const auto& info : cps) {
    if (isLatinLetter(info.value)) {
      hasLatin = true;
    } else if (isCyrillicLetter(info.value)) {
      hasCyrillic = true;
    }
  }
  if (hasLatin && !hasCyrillic) {
    return Script::Latin;
  }
  if (!hasLatin && hasCyrillic) {
    return Script::Cyrillic;
  }
  return Script::Mixed;
 }
--- a/lib/Epub/Epub/hyphenation/HyphenationCommon.h
+++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.h
@ -0,0 +1,31 @@
 #pragma once
 #include <cstddef>
 #include <cstdint>
 #include <vector>
 struct CodepointInfo {
  uint32_t value;
  size_t byteOffset;
 };
 enum class Script { Latin, Cyrillic, Mixed };
 constexpr size_t MIN_PREFIX_CP = 3;
 constexpr size_t MIN_SUFFIX_CP = 2;
 uint32_t toLowerLatin(uint32_t cp);
 uint32_t toLowerCyrillic(uint32_t cp);
 bool isLatinLetter(uint32_t cp);
 bool isLatinVowel(uint32_t cp);
 bool isLatinConsonant(uint32_t cp);
 bool isCyrillicLetter(uint32_t cp);
 bool isCyrillicVowel(uint32_t cp);
 bool isCyrillicConsonant(uint32_t cp);
 bool isAlphabetic(uint32_t cp);
 bool isVowel(uint32_t cp);
 Script detectScript(const std::vector<CodepointInfo>& cps);
--- a/lib/Epub/Epub/hyphenation/Hyphenator.cpp
+++ b/lib/Epub/Epub/hyphenation/Hyphenator.cpp
@ -0,0 +1,158 @@
 #include "Hyphenator.h"
 #include <GfxRenderer.h>
 #include <Utf8.h>
 #include <algorithm>
 #include <array>
 #include <limits>
 #include <vector>
 #include "EnglishHyphenator.h"
 #include "HyphenationCommon.h"
 #include "LanguageHyphenator.h"
 #include "RussianHyphenator.h"
 namespace {
 const std::array<const LanguageHyphenator*, 2>& registeredHyphenators() {
  static const std::array<const LanguageHyphenator*, 2> hyphenators = {
      &EnglishHyphenator::instance(),
      &RussianHyphenator::instance(),
  };
  return hyphenators;
 }
 const LanguageHyphenator* hyphenatorForScript(const Script script) {
  for (const auto* hyphenator : registeredHyphenators()) {
    if (hyphenator->script() == script) {
      return hyphenator;
    }
  }
  return nullptr;
 }
 std::vector<CodepointInfo> collectCodepoints(const std::string& word) {
  std::vector<CodepointInfo> cps;
  cps.reserve(word.size());
  const unsigned char* base = reinterpret_cast<const unsigned char*>(word.c_str());
  const unsigned char* ptr = base;
  while (*ptr != 0) {
    const unsigned char* current = ptr;
    const uint32_t cp = utf8NextCodepoint(&ptr);
    cps.push_back({cp, static_cast<size_t>(current - base)});
  }
  return cps;
 }
 bool hasOnlyAlphabetic(const std::vector<CodepointInfo>& cps) {
  if (cps.empty()) {
    return false;
  }
  for (const auto& info : cps) {
    if (!isAlphabetic(info.value)) {
      return false;
    }
  }
  return true;
 }
 std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps) {
  if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
    return {};
  }
  const Script script = detectScript(cps);
  if (const auto* hyphenator = hyphenatorForScript(script)) {
    auto indexes = hyphenator->breakIndexes(cps);
    return indexes;
  }
  return {};
 }
 size_t byteOffsetForIndex(const std::vector<CodepointInfo>& cps, const size_t index) {
  if (index >= cps.size()) {
    return cps.empty() ? 0 : cps.back().byteOffset;
  }
  return cps[index].byteOffset;
 }
 std::string slice(const std::string& word, const size_t startByte, const size_t endByte) {
  if (startByte >= endByte || startByte >= word.size()) {
    return std::string();
  }
  const size_t boundedEnd = std::min(endByte, word.size());
  return word.substr(startByte, boundedEnd - startByte);
 }
 }  // namespace
 bool Hyphenator::splitWord(const GfxRenderer& renderer, const int fontId, const std::string& word,
                           const EpdFontStyle style, const int availableWidth, HyphenationResult* result,
                           const bool force) {
  if (!result || word.empty()) {
    return false;
  }
  auto cps = collectCodepoints(word);
  if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
    return false;
  }
  if (!force && !hasOnlyAlphabetic(cps)) {
    return false;
  }
  const auto breakIndexes = collectBreakIndexes(cps);
  const int hyphenWidth = renderer.getTextWidth(fontId, "-", style);
  const int adjustedWidth = availableWidth - hyphenWidth;
  size_t chosenIndex = std::numeric_limits<size_t>::max();
  if (adjustedWidth > 0) {
    for (const size_t idx : breakIndexes) {
      const size_t byteOffset = byteOffsetForIndex(cps, idx);
      const std::string prefix = word.substr(0, byteOffset);
      const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
      if (prefixWidth <= adjustedWidth) {
        chosenIndex = idx;
      } else {
        break;
      }
    }
  }
  if (chosenIndex == std::numeric_limits<size_t>::max() && force) {
    for (size_t idx = MIN_PREFIX_CP; idx + MIN_SUFFIX_CP <= cps.size(); ++idx) {
      const size_t byteOffset = byteOffsetForIndex(cps, idx);
      const std::string prefix = word.substr(0, byteOffset);
      const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
      if (adjustedWidth <= 0 || prefixWidth <= adjustedWidth) {
        chosenIndex = idx;
        if (adjustedWidth > 0 && prefixWidth > adjustedWidth) {
          break;
        }
      }
    }
  }
  if (chosenIndex == std::numeric_limits<size_t>::max()) {
    return false;
  }
  const size_t splitByte = byteOffsetForIndex(cps, chosenIndex);
  const std::string head = word.substr(0, splitByte);
  const std::string tail = slice(word, splitByte, word.size());
  if (head.empty() || tail.empty()) {
    return false;
  }
  result->head = head + "-";
  result->tail = tail;
  return true;
 }
--- a/lib/Epub/Epub/hyphenation/Hyphenator.h
+++ b/lib/Epub/Epub/hyphenation/Hyphenator.h
@ -0,0 +1,18 @@
 #pragma once
 #include <EpdFontFamily.h>
 #include <string>
 class GfxRenderer;
 struct HyphenationResult {
  std::string head;
  std::string tail;
 };
 class Hyphenator {
 public:
  static bool splitWord(const GfxRenderer& renderer, int fontId, const std::string& word, EpdFontStyle style,
                        int availableWidth, HyphenationResult* result, bool force);
 };
--- a/lib/Epub/Epub/hyphenation/LanguageHyphenator.h
+++ b/lib/Epub/Epub/hyphenation/LanguageHyphenator.h
@ -0,0 +1,12 @@
 #pragma once
 #include <vector>
 #include "HyphenationCommon.h"
 class LanguageHyphenator {
 public:
  virtual ~LanguageHyphenator() = default;
  virtual Script script() const = 0;
  virtual std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const = 0;
 };
--- a/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp
+++ b/lib/Epub/Epub/hyphenation/RussianHyphenator.cpp
@ -0,0 +1,199 @@
 #include "RussianHyphenator.h"
 #include <algorithm>
 #include <vector>
 namespace {
 bool isSoftOrHardSign(const uint32_t cp) { return cp == 0x044C || cp == 0x042C || cp == 0x044A || cp == 0x042A; }
 bool isRussianPrefixConsonant(uint32_t cp) {
  cp = toLowerCyrillic(cp);
  return cp == 0x0432 || cp == 0x0437 || cp == 0x0441;  // в, з, с
 }
 bool isRussianSibilant(uint32_t cp) {
  cp = toLowerCyrillic(cp);
  switch (cp) {
    case 0x0437:  // з
    case 0x0441:  // с
    case 0x0436:  // ж
    case 0x0448:  // ш
    case 0x0449:  // щ
    case 0x0447:  // ч
    case 0x0446:  // ц
      return true;
    default:
      return false;
  }
 }
 bool isRussianStop(uint32_t cp) {
  cp = toLowerCyrillic(cp);
  switch (cp) {
    case 0x0431:  // б
    case 0x0433:  // г
    case 0x0434:  // д
    case 0x043F:  // п
    case 0x0442:  // т
    case 0x043A:  // к
      return true;
    default:
      return false;
  }
 }
 int russianSonority(uint32_t cp) {
  cp = toLowerCyrillic(cp);
  switch (cp) {
    case 0x043B:  // л
    case 0x0440:  // р
    case 0x0439:  // й
      return 4;
    case 0x043C:  // м
    case 0x043D:  // н
      return 3;
    case 0x0432:  // в
    case 0x0437:  // з
    case 0x0436:  // ж
      return 2;
    case 0x0444:  // ф
    case 0x0441:  // с
    case 0x0448:  // ш
    case 0x0449:  // щ
    case 0x0447:  // ч
    case 0x0446:  // ц
    case 0x0445:  // х
      return 1;
    case 0x0431:  // б
    case 0x0433:  // г
    case 0x0434:  // д
    case 0x043F:  // п
    case 0x0442:  // т
    case 0x043A:  // к
      return 0;
    default:
      return 1;
  }
 }
 bool russianClusterIsValidOnset(const std::vector<CodepointInfo>& cps, const size_t start, const size_t end) {
  if (start >= end) {
    return false;
  }
  for (size_t i = start; i < end; ++i) {
    const auto cp = cps[i].value;
    if (!isCyrillicConsonant(cp) || isSoftOrHardSign(cp)) {
      return false;
    }
  }
  if (end - start == 1) {
    return true;
  }
  for (size_t i = start; i + 1 < end; ++i) {
    const uint32_t current = cps[i].value;
    const uint32_t next = cps[i + 1].value;
    const int currentRank = russianSonority(current);
    const int nextRank = russianSonority(next);
    if (currentRank > nextRank) {
      const bool atClusterStart = (i == start);
      const bool prefixAllowance = atClusterStart && isRussianPrefixConsonant(current);
      const bool sibilantAllowance = isRussianSibilant(current) && isRussianStop(next);
      if (!prefixAllowance && !sibilantAllowance) {
        return false;
      }
    }
  }
  return true;
 }
 size_t russianOnsetLength(const std::vector<CodepointInfo>& cps, const size_t clusterStart, const size_t clusterEnd) {
  const size_t clusterLen = clusterEnd - clusterStart;
  if (clusterLen == 0) {
    return 0;
  }
  const size_t maxLen = std::min<size_t>(4, clusterLen);
  for (size_t len = maxLen; len >= 1; --len) {
    const size_t suffixStart = clusterEnd - len;
    if (russianClusterIsValidOnset(cps, suffixStart, clusterEnd)) {
      return len;
    }
  }
  return 1;
 }
 bool nextToSoftSign(const std::vector<CodepointInfo>& cps, const size_t index) {
  if (index == 0 || index >= cps.size()) {
    return false;
  }
  const auto left = cps[index - 1].value;
  const auto right = cps[index].value;
  return isSoftOrHardSign(left) || isSoftOrHardSign(right);
 }
 std::vector<size_t> russianBreakIndexes(const std::vector<CodepointInfo>& cps) {
  std::vector<size_t> indexes;
  if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
    return indexes;
  }
  std::vector<size_t> vowelPositions;
  vowelPositions.reserve(cps.size());
  for (size_t i = 0; i < cps.size(); ++i) {
    if (isCyrillicVowel(cps[i].value)) {
      vowelPositions.push_back(i);
    }
  }
  if (vowelPositions.size() < 2) {
    return indexes;
  }
  for (size_t v = 0; v + 1 < vowelPositions.size(); ++v) {
    const size_t leftVowel = vowelPositions[v];
    const size_t rightVowel = vowelPositions[v + 1];
    if (rightVowel - leftVowel == 1) {
      if (rightVowel >= MIN_PREFIX_CP && cps.size() - rightVowel >= MIN_SUFFIX_CP && !nextToSoftSign(cps, rightVowel)) {
        indexes.push_back(rightVowel);
      }
      continue;
    }
    const size_t clusterStart = leftVowel + 1;
    const size_t clusterEnd = rightVowel;
    const size_t onsetLen = russianOnsetLength(cps, clusterStart, clusterEnd);
    size_t breakIndex = clusterEnd - onsetLen;
    if (breakIndex < MIN_PREFIX_CP || cps.size() - breakIndex < MIN_SUFFIX_CP) {
      continue;
    }
    if (nextToSoftSign(cps, breakIndex)) {
      continue;
    }
    indexes.push_back(breakIndex);
  }
  std::sort(indexes.begin(), indexes.end());
  indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
  return indexes;
 }
 }  // namespace
 const RussianHyphenator& RussianHyphenator::instance() {
  static RussianHyphenator instance;
  return instance;
 }
 Script RussianHyphenator::script() const { return Script::Cyrillic; }
 std::vector<size_t> RussianHyphenator::breakIndexes(const std::vector<CodepointInfo>& cps) const {
  return russianBreakIndexes(cps);
 }
--- a/lib/Epub/Epub/hyphenation/RussianHyphenator.h
+++ b/lib/Epub/Epub/hyphenation/RussianHyphenator.h
@ -0,0 +1,14 @@
 #pragma once
 #include "LanguageHyphenator.h"
 class RussianHyphenator final : public LanguageHyphenator {
 public:
  static const RussianHyphenator& instance();
  Script script() const override;
  std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const override;
 private:
  RussianHyphenator() = default;
 };