From e46a271c12c31695f41ad5f241d3cf2a3944be50 Mon Sep 17 00:00:00 2001
From: Arthur Tazhitdinov <lisnake@gmail.com>
Date: Wed, 7 Jan 2026 02:14:01 +0500
Subject: [PATCH] Implement hyphenation support and refactor line break
 calculations

---
 lib/Epub/Epub/ParsedText.cpp | 532 +++++++++++++++++++----------------
 lib/Epub/Epub/ParsedText.h   |  18 +-
 2 files changed, 296 insertions(+), 254 deletions(-)
diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
index 9a6162d7..524cdb9b 100644
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@@ -1,7 +1,7 @@
 #include "ParsedText.h"
 
 #include <GfxRenderer.h>
-#include <Utf8.h>
+#include "hyphenation/Hyphenator.h"
 
 #include <algorithm>
 #include <cmath>
@@ -10,95 +10,8 @@
 #include <limits>
 #include <vector>
 
-#include "hyphenation/HyphenationCommon.h"
-#include "hyphenation/Hyphenator.h"
-
 constexpr int MAX_COST = std::numeric_limits<int>::max();
 
-namespace {
-
-struct HyphenSplitDecision {
-  size_t byteOffset;
-  uint16_t prefixWidth;
-  bool appendHyphen;  // true when we must draw an extra hyphen after the prefix glyphs
-};
-
-// Verifies whether the substring ending at `offset` already contains a literal hyphen glyph, so we can avoid
-// drawing a duplicate hyphen when breaking the word.
-bool endsWithExplicitHyphen(const std::string& word, const size_t offset) {
-  if (offset == 0 || offset > word.size()) {
-    return false;
-  }
-
-  const unsigned char* base = reinterpret_cast<const unsigned char*>(word.data());
-  const unsigned char* ptr = base;
-  const unsigned char* target = base + offset;
-  const unsigned char* lastStart = nullptr;
-
-  while (ptr < target) {
-    lastStart = ptr;
-    utf8NextCodepoint(&ptr);
-    if (ptr > target) {
-      return false;
-    }
-  }
-
-  if (!lastStart || ptr != target) {
-    return false;
-  }
-
-  const unsigned char* tmp = lastStart;
-  const uint32_t cp = utf8NextCodepoint(&tmp);  // decode the codepoint immediately prior to the break
-  return isExplicitHyphen(cp);
-}
-
-bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
-                         const EpdFontFamily::Style style, const int availableWidth, const bool includeFallback,
-                         HyphenSplitDecision* decision) {
-  if (!decision || availableWidth <= 0) {
-    return false;
-  }
-
-  const int hyphenWidth = renderer.getTextWidth(fontId, "-", style);
-
-  auto offsets = Hyphenator::breakOffsets(word, includeFallback);
-  if (offsets.empty()) {
-    return false;
-  }
-
-  size_t chosenOffset = std::numeric_limits<size_t>::max();
-  uint16_t chosenWidth = 0;
-  bool chosenAppendHyphen = true;
-
-  for (const size_t offset : offsets) {
-    const bool needsInsertedHyphen = !endsWithExplicitHyphen(word, offset);
-    const int budget = availableWidth - (needsInsertedHyphen ? hyphenWidth : 0);
-    if (budget <= 0) {
-      continue;
-    }
-    const std::string prefix = word.substr(0, offset);
-    const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
-    if (prefixWidth <= budget) {
-      chosenOffset = offset;
-      chosenWidth = static_cast<uint16_t>(prefixWidth + (needsInsertedHyphen ? hyphenWidth : 0));
-      chosenAppendHyphen = needsInsertedHyphen;
-    } else {
-      break;
-    }
-  }
-
-  if (chosenOffset == std::numeric_limits<size_t>::max()) {
-    return false;
-  }
-
-  decision->byteOffset = chosenOffset;
-  decision->prefixWidth = chosenWidth;
-  decision->appendHyphen = chosenAppendHyphen;
-  return true;
-}
-
-}  // namespace
-
 void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) {
   if (word.empty()) return;
 
@@ -114,11 +27,20 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
     return;
   }
 
+  // Apply fixed transforms before any per-line layout work.
+  applyParagraphIndent();
+  ensureHyphenationFitsViewport(renderer, fontId, viewportWidth);
+
   const int pageWidth = viewportWidth;
   const int spaceWidth = renderer.getSpaceWidth(fontId);
-  // Pre-split oversized tokens so the DP step always has feasible line candidates.
-  auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth);
-  auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
+  auto wordWidths = calculateWordWidths(renderer, fontId);
+  std::vector<size_t> lineBreakIndices;
+  if (hyphenationEnabled) {
+    // Use greedy layout that can split words mid-loop when a hyphenated prefix fits.
+    lineBreakIndices = computeHyphenatedLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
+  } else {
+    lineBreakIndices = computeLineBreaks(pageWidth, spaceWidth, wordWidths);
+  }
   const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
 
   for (size_t i = 0; i < lineCount; ++i) {
@@ -126,52 +48,17 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
   }
 }
 
-std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId,
-                                                      const int pageWidth) {
+std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
   const size_t totalWordCount = words.size();
 
   std::vector<uint16_t> wordWidths;
   wordWidths.reserve(totalWordCount);
 
-  // add em-space at the beginning of first word in paragraph to indent
-  if (!extraParagraphSpacing) {
-    std::string& first_word = words.front();
-    first_word.insert(0, "\xe2\x80\x83");
-  }
-
   auto wordsIt = words.begin();
   auto wordStylesIt = wordStyles.begin();
 
   while (wordsIt != words.end()) {
-    uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt);
-
-    if (width > pageWidth) {
-      HyphenSplitDecision decision{};
-      if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, &decision)) {
-        const std::string originalWord = *wordsIt;
-        const std::string tail = originalWord.substr(decision.byteOffset);
-        if (tail.empty()) {
-          continue;
-        }
-        std::string prefix = originalWord.substr(0, decision.byteOffset);
-        if (decision.appendHyphen) {
-          prefix += "-";
-        }
-
-        *wordsIt = prefix;
-        auto nextWordIt = words.insert(std::next(wordsIt), tail);
-        auto nextStyleIt = wordStyles.insert(std::next(wordStylesIt), *wordStylesIt);
-        // Continue processing the freshly inserted tail so cascading splits still respect the limit.
-
-        wordWidths.push_back(decision.prefixWidth);
-
-        wordsIt = nextWordIt;
-        wordStylesIt = nextStyleIt;
-        continue;
-      }
-    }
-
-    wordWidths.push_back(width);
+    wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt));
 
     std::advance(wordsIt, 1);
     std::advance(wordStylesIt, 1);
@@ -180,149 +67,289 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
   return wordWidths;
 }
 
-std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
-                                                  const int spaceWidth, std::vector<uint16_t>& wordWidths) {
-  std::vector<size_t> lineBreakIndices;
-  if (words.empty()) {
-    return lineBreakIndices;
-  }
+std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int spaceWidth,
+                                                  const std::vector<uint16_t>& wordWidths) const {
+  const size_t totalWordCount = words.size();
 
-  if (wordWidths.empty()) {
-    return lineBreakIndices;
-  }
+  // DP table to store the minimum badness (cost) of lines starting at index i
+  std::vector<int> dp(totalWordCount);
+  // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
+  std::vector<size_t> ans(totalWordCount);
 
-  constexpr size_t MAX_LINES = 1000;
-  const auto appendLineBreak = [&](size_t index) {
-    const size_t clampedIndex = std::min(index, wordWidths.size());
-    lineBreakIndices.push_back(clampedIndex);
-    return lineBreakIndices.size() < MAX_LINES;
-  };
+  // Base Case
+  dp[totalWordCount - 1] = 0;
+  ans[totalWordCount - 1] = totalWordCount - 1;
 
-  const auto runDp = [&]() {
-    lineBreakIndices.clear();
-    const size_t totalWordCount = wordWidths.size();
-    if (totalWordCount == 0) {
-      return;
-    }
+  for (int i = totalWordCount - 2; i >= 0; --i) {
+    int currlen = -spaceWidth;
+    dp[i] = MAX_COST;
 
-    std::vector<int> dp(totalWordCount);
-    std::vector<size_t> ans(totalWordCount);
-    dp[totalWordCount - 1] = 0;
-    ans[totalWordCount - 1] = totalWordCount - 1;
+    for (size_t j = i; j < totalWordCount; ++j) {
+      // Current line length: previous width + space + current word width
+      currlen += wordWidths[j] + spaceWidth;
 
-    for (int i = static_cast<int>(totalWordCount) - 2; i >= 0; --i) {
-      int currlen = -spaceWidth;
-      dp[i] = MAX_COST;
+      if (currlen > pageWidth) {
+        break;
+      }
 
-      for (size_t j = i; j < totalWordCount; ++j) {
-        currlen += wordWidths[j] + spaceWidth;
-        if (currlen > pageWidth) {
-          break;
-        }
+      int cost;
+      if (j == totalWordCount - 1) {
+        cost = 0;  // Last line
+      } else {
+        const int remainingSpace = pageWidth - currlen;
+        // Use long long for the square to prevent overflow
+        const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
 
-        int cost;
-        if (j == totalWordCount - 1) {
-          cost = 0;
+        if (cost_ll > MAX_COST) {
+          cost = MAX_COST;
         } else {
-          const int remainingSpace = pageWidth - currlen;
-          const long long costLl = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
-          cost = costLl > MAX_COST ? MAX_COST : static_cast<int>(costLl);
+          cost = static_cast<int>(cost_ll);
         }
+      }
 
-        if (cost < dp[i]) {
-          dp[i] = cost;
-          ans[i] = j;
-        }
+      if (cost < dp[i]) {
+        dp[i] = cost;
+        ans[i] = j;  // j is the index of the last word in this optimal line
       }
     }
 
-    size_t currentWordIndex = 0;
-    while (currentWordIndex < totalWordCount && lineBreakIndices.size() < MAX_LINES) {
-      const size_t nextBreakIndex = ans[currentWordIndex] + 1;
-      if (!appendLineBreak(nextBreakIndex)) {
-        break;
+    // Handle oversized word: if no valid configuration found, force single-word line
+    // This prevents cascade failure where one oversized word breaks all preceding words
+    if (dp[i] == MAX_COST) {
+      ans[i] = i;  // Just this word on its own line
+      // Inherit cost from next word to allow subsequent words to find valid configurations
+      if (i + 1 < static_cast<int>(totalWordCount)) {
+        dp[i] = dp[i + 1];
+      } else {
+        dp[i] = 0;
       }
-      currentWordIndex = nextBreakIndex;
     }
-  };
+  }
 
-  const auto runHyphenatedLayout = [&]() {
-    size_t currentIndex = 0;
-    bool continueLayout = true;
+  // Stores the index of the word that starts the next line (last_word_index + 1)
+  std::vector<size_t> lineBreakIndices;
+  size_t currentWordIndex = 0;
 
-    while (currentIndex < wordWidths.size() && continueLayout) {
-      int lineWidth = 0;
-      size_t wordsOnLine = 0;
+  while (currentWordIndex < totalWordCount) {
+    size_t nextBreakIndex = ans[currentWordIndex] + 1;
 
-      while (currentIndex < wordWidths.size()) {
-        const int interWordSpace = (wordsOnLine == 0) ? 0 : spaceWidth;
-        const int projectedWidth = lineWidth + interWordSpace + wordWidths[currentIndex];
-
-        if (projectedWidth <= pageWidth) {
-          lineWidth = projectedWidth;
-          ++currentIndex;
-          ++wordsOnLine;
-          continue;
-        }
-
-        auto wordNodeIt = words.begin();
-        auto styleNodeIt = wordStyles.begin();
-        std::advance(wordNodeIt, currentIndex);
-        std::advance(styleNodeIt, currentIndex);
-        if (wordNodeIt == words.end()) {
-          break;
-        }
-
-        const int availableWidth = pageWidth - lineWidth - interWordSpace;
-        HyphenSplitDecision decision{};
-        if (!chooseSplitForWidth(renderer, fontId, *wordNodeIt, *styleNodeIt, availableWidth, false, &decision)) {
-          break;
-        }
-
-        const std::string originalWord = *wordNodeIt;
-        const std::string tail = originalWord.substr(decision.byteOffset);
-        if (tail.empty()) {
-          break;
-        }
-        std::string prefix = originalWord.substr(0, decision.byteOffset);
-        if (decision.appendHyphen) {
-          prefix += "-";
-        }
-
-        const EpdFontFamily::Style styleForSplit = *styleNodeIt;
-        *wordNodeIt = tail;
-        words.insert(wordNodeIt, prefix);
-        wordStyles.insert(styleNodeIt, styleForSplit);
-
-        const uint16_t tailWidth = renderer.getTextWidth(fontId, tail.c_str(), styleForSplit);
-        wordWidths.insert(wordWidths.begin() + currentIndex, decision.prefixWidth);
-        wordWidths[currentIndex + 1] = tailWidth;
-
-        lineWidth += interWordSpace + decision.prefixWidth;
-        ++currentIndex;
-        ++wordsOnLine;
-        break;
-      }
-
-      if (wordsOnLine == 0) {
-        ++currentIndex;
-      }
-
-      continueLayout = appendLineBreak(currentIndex);
+    // Safety check: prevent infinite loop if nextBreakIndex doesn't advance
+    if (nextBreakIndex <= currentWordIndex) {
+      // Force advance by at least one word to avoid infinite loop
+      nextBreakIndex = currentWordIndex + 1;
     }
-  };
 
-  if (hyphenationEnabled) {
-    // Simple greedy layout with hyphenation
-    runHyphenatedLayout();
-  } else {
-    // TeX-like optimal layout without hyphenation
-    runDp();
+    lineBreakIndices.push_back(nextBreakIndex);
+    currentWordIndex = nextBreakIndex;
   }
 
   return lineBreakIndices;
 }
 
+void ParsedText::applyParagraphIndent() {
+  if (extraParagraphSpacing || words.empty()) {
+    return;
+  }
+
+  words.front().insert(0, "\xe2\x80\x83");
+}
+
+void ParsedText::ensureHyphenationFitsViewport(const GfxRenderer& renderer, const int fontId,
+                                               const int viewportWidth) {
+  if (!hyphenationEnabled || viewportWidth <= 0) {
+    return;
+  }
+
+  auto wordIt = words.begin();
+  auto styleIt = wordStyles.begin();
+
+  while (wordIt != words.end() && styleIt != wordStyles.end()) {
+    const int wordWidth = renderer.getTextWidth(fontId, wordIt->c_str(), *styleIt);
+    if (wordWidth <= viewportWidth) {
+      ++wordIt;
+      ++styleIt;
+      continue;
+    }
+
+    auto chunks = hyphenateWordToFit(*wordIt, renderer, fontId, *styleIt, viewportWidth);
+    if (chunks.empty()) {
+      ++wordIt;
+      ++styleIt;
+      continue;
+    }
+
+    *wordIt = chunks.front();
+
+    auto wordInsertPos = std::next(wordIt);
+    auto styleInsertPos = std::next(styleIt);
+    for (size_t idx = 1; idx < chunks.size(); ++idx) {
+      wordInsertPos = words.insert(wordInsertPos, chunks[idx]);
+      styleInsertPos = wordStyles.insert(styleInsertPos, *styleIt);
+      ++wordInsertPos;
+      ++styleInsertPos;
+    }
+
+    ++wordIt;
+    ++styleIt;
+  }
+}
+
+std::vector<std::string> ParsedText::hyphenateWordToFit(const std::string& word, const GfxRenderer& renderer,
+                                                        const int fontId, const EpdFontFamily::Style style,
+                                                        const int viewportWidth) const {
+  std::vector<std::string> chunks;
+  if (word.empty() || viewportWidth <= 0) {
+    return chunks;
+  }
+
+  const int hyphenWidth = renderer.getTextWidth(fontId, "-", style);
+  std::string remaining = word;
+
+  while (!remaining.empty()) {
+    const int remainingWidth = renderer.getTextWidth(fontId, remaining.c_str(), style);
+    if (remainingWidth <= viewportWidth) {
+      chunks.push_back(remaining);
+      break;
+    }
+
+    const auto breakOffsets = Hyphenator::breakOffsets(remaining, true);
+    if (breakOffsets.empty()) {
+      return {};
+    }
+
+    size_t chosenOffset = 0;
+    int chosenWidth = -1;
+    for (const size_t offset : breakOffsets) {
+      if (offset == 0 || offset >= remaining.size()) {
+        continue;
+      }
+
+      const std::string candidate = remaining.substr(0, offset);
+      const int candidateWidth = renderer.getTextWidth(fontId, candidate.c_str(), style) + hyphenWidth;
+      if (candidateWidth > viewportWidth) {
+        continue;
+      }
+
+      if (candidateWidth > chosenWidth) {
+        chosenWidth = candidateWidth;
+        chosenOffset = offset;
+      }
+    }
+
+    if (chosenWidth < 0) {
+      return {};
+    }
+
+    std::string chunk = remaining.substr(0, chosenOffset);
+    chunk.push_back('-');
+    chunks.push_back(std::move(chunk));
+    remaining.erase(0, chosenOffset);
+  }
+
+  return chunks;
+}
+
+// Builds break indices while opportunistically splitting the word that would overflow the current line.
+std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId,
+                                                            const int pageWidth, const int spaceWidth,
+                                                            std::vector<uint16_t>& wordWidths) {
+  std::vector<size_t> lineBreakIndices;
+  size_t currentIndex = 0;
+
+  while (currentIndex < wordWidths.size()) {
+    const size_t lineStart = currentIndex;
+    int lineWidth = 0;
+
+    while (currentIndex < wordWidths.size()) {
+      const bool isFirstWord = currentIndex == lineStart;
+      const int spacing = isFirstWord ? 0 : spaceWidth;
+      const int candidateWidth = spacing + wordWidths[currentIndex];
+
+      if (lineWidth + candidateWidth <= pageWidth) {
+        lineWidth += candidateWidth;
+        currentIndex += 1;
+        continue;
+      }
+
+      const int availableWidth = pageWidth - lineWidth - spacing;
+      if (availableWidth > 0 && hyphenateWordAtIndex(currentIndex, availableWidth, renderer, fontId, wordWidths)) {
+        // Widths updated for the split word; retry with current index
+        continue;
+      }
+
+      // Word cannot fit on current line (with or without hyphenation).
+      if (currentIndex == lineStart) {
+        // Force place at least one word to prevent stalling.
+        lineWidth += candidateWidth;
+        currentIndex += 1;
+      }
+      break;
+    }
+
+    lineBreakIndices.push_back(currentIndex);
+  }
+
+  return lineBreakIndices;
+}
+
+// Splits words[wordIndex] into prefix+hyphen and remainder when a legal breakpoint fits the available width.
+bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availableWidth, const GfxRenderer& renderer,
+                                      const int fontId, std::vector<uint16_t>& wordWidths) {
+  if (availableWidth <= 0 || wordIndex >= words.size()) {
+    return false;
+  }
+
+  auto wordIt = words.begin();
+  auto styleIt = wordStyles.begin();
+  std::advance(wordIt, wordIndex);
+  std::advance(styleIt, wordIndex);
+
+  const auto breakOffsets = Hyphenator::breakOffsets(*wordIt, true);
+  if (breakOffsets.empty()) {
+    return false;
+  }
+
+  const auto style = *styleIt;
+  size_t chosenOffset = 0;
+  int chosenWidth = -1;
+
+  for (const size_t offset : breakOffsets) {
+    if (offset == 0 || offset >= wordIt->size()) {
+      continue;
+    }
+
+    std::string prefix = wordIt->substr(0, offset);
+    prefix.push_back('-');
+    const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
+    if (prefixWidth > availableWidth) {
+      continue;
+    }
+
+    if (prefixWidth > chosenWidth) {
+      chosenWidth = prefixWidth;
+      chosenOffset = offset;
+    }
+  }
+
+  if (chosenWidth < 0) {
+    return false;
+  }
+
+  std::string remainder = wordIt->substr(chosenOffset);
+  wordIt->resize(chosenOffset);
+  wordIt->push_back('-');
+
+  auto insertWordIt = std::next(wordIt);
+  auto insertStyleIt = std::next(styleIt);
+  words.insert(insertWordIt, remainder);
+  wordStyles.insert(insertStyleIt, style);
+
+  wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);
+  const uint16_t remainderWidth = renderer.getTextWidth(fontId, remainder.c_str(), style);
+  wordWidths.insert(wordWidths.begin() + wordIndex + 1, remainderWidth);
+  return true;
+}
+
 void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
                              const std::vector<uint16_t>& wordWidths, const std::vector<size_t>& lineBreakIndices,
                              const std::function<void(std::shared_ptr<TextBlock>)>& processLine) {
@@ -330,19 +357,23 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
   const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
   const size_t lineWordCount = lineBreak - lastBreakAt;
 
+  // Calculate total word width for this line
   int lineWordWidthSum = 0;
-  for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
-    lineWordWidthSum += wordWidths[idx];
+  for (size_t i = lastBreakAt; i < lineBreak; i++) {
+    lineWordWidthSum += wordWidths[i];
   }
-  const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
 
+  // Calculate spacing
   const int spareSpace = pageWidth - lineWordWidthSum;
 
   int spacing = spaceWidth;
+  const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
+
   if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
     spacing = spareSpace / (lineWordCount - 1);
   }
 
+  // Calculate initial x position
   uint16_t xpos = 0;
   if (style == TextBlock::RIGHT_ALIGN) {
     xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
@@ -350,6 +381,7 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
     xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
   }
 
+  // Pre-calculate X positions for words
   std::list<uint16_t> lineXPos;
   for (size_t i = lastBreakAt; i < lineBreak; i++) {
     const uint16_t currentWordWidth = wordWidths[i];
@@ -357,11 +389,13 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
     xpos += currentWordWidth + spacing;
   }
 
+  // Iterators always start at the beginning as we are moving content with splice below
   auto wordEndIt = words.begin();
   auto wordStyleEndIt = wordStyles.begin();
   std::advance(wordEndIt, lineWordCount);
   std::advance(wordStyleEndIt, lineWordCount);
 
+  // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
   std::list<std::string> lineWords;
   lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
   std::list<EpdFontFamily::Style> lineWordStyles;
diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h
index f19b26cb..13f36a22 100644
--- a/lib/Epub/Epub/ParsedText.h
+++ b/lib/Epub/Epub/ParsedText.h
@@ -19,15 +19,23 @@ class ParsedText {
   bool extraParagraphSpacing;
   bool hyphenationEnabled;
 
-  std::vector<size_t> computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth, int spaceWidth,
-                                        std::vector<uint16_t>& wordWidths);
+  void applyParagraphIndent();
+  void ensureHyphenationFitsViewport(const GfxRenderer& renderer, int fontId, int viewportWidth);
+  std::vector<std::string> hyphenateWordToFit(const std::string& word, const GfxRenderer& renderer, int fontId,
+                                             EpdFontFamily::Style style, int viewportWidth) const;
+  std::vector<size_t> computeHyphenatedLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth,
+                                                  int spaceWidth, std::vector<uint16_t>& wordWidths);
+  bool hyphenateWordAtIndex(size_t wordIndex, int availableWidth, const GfxRenderer& renderer, int fontId,
+                            std::vector<uint16_t>& wordWidths);
+  std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths) const;
   void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
                    const std::vector<size_t>& lineBreakIndices,
                    const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
-  std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth);
+  std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId);
 
  public:
-  explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, const bool hyphenationEnabled)
+  explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing,
+                      const bool hyphenationEnabled = false)
       : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {}
   ~ParsedText() = default;
 
@@ -39,4 +47,4 @@ class ParsedText {
   void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth,
                              const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
                              bool includeLastLine = true);
-};
+};
\ No newline at end of file