Refactor ParsedText to remove PrefixWidthCache and simplify word width calculations for improved clarity and performance

2026-02-04 22:57:50 +03:00 · 2025-12-26 01:47:40 +05:00 · 2025-12-26 01:47:40 +05:00 · 73c8b17737
commit 73c8b17737
parent 4acdad44ff
2 changed files with 33 additions and 75 deletions
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@ -8,15 +8,12 @@
 #include <functional>
 #include <iterator>
 #include <limits>
-#include <unordered_map>
 #include <vector>

 constexpr int MAX_COST = std::numeric_limits<int>::max();

 namespace {

-using PrefixWidthCache = ParsedText::PrefixWidthCache;
-
 struct HyphenSplitDecision {
  size_t byteOffset;
  uint16_t prefixWidth;
@ -27,24 +24,9 @@ struct HyphenationGuard {
  size_t tailIndex;
 };

-uint16_t cachedPrefixWidth(PrefixWidthCache& cache, const GfxRenderer& renderer, const int fontId,
-                           const std::string& word, const EpdFontStyle style, const size_t prefixBytes) {
-  const void* wordKey = static_cast<const void*>(&word);
-  auto& offsetMap = cache[wordKey];
-  const auto it = offsetMap.find(prefixBytes);
-  if (it != offsetMap.end()) {
-    return it->second;
-  }
-
-  const std::string prefix = word.substr(0, prefixBytes);
-  const uint16_t width = renderer.getTextWidth(fontId, prefix.c_str(), style);
-  offsetMap.emplace(prefixBytes, width);
-  return width;
-}
-
 bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const std::string& word,
                         const EpdFontStyle style, const int availableWidth, const bool includeFallback,
-                         PrefixWidthCache& cache, HyphenSplitDecision* decision) {
+                         HyphenSplitDecision* decision) {
  if (!decision || availableWidth <= 0) {
    return false;
  }
@ -64,7 +46,8 @@ bool chooseSplitForWidth(const GfxRenderer& renderer, const int fontId, const st
  uint16_t chosenWidth = 0;

  for (const size_t offset : offsets) {
-    const int prefixWidth = cachedPrefixWidth(cache, renderer, fontId, word, style, offset);
+    const std::string prefix = word.substr(0, offset);
+    const int prefixWidth = renderer.getTextWidth(fontId, prefix.c_str(), style);
    if (prefixWidth <= adjustedWidth) {
      chosenOffset = offset;
      chosenWidth = static_cast<uint16_t>(prefixWidth + hyphenWidth);
@ -101,9 +84,9 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo

  const int pageWidth = renderer.getScreenWidth() - horizontalMargin;
  const int spaceWidth = renderer.getSpaceWidth(fontId);
-  PrefixWidthCache prefixWidthCache;
-  auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth, prefixWidthCache);
-  auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths, prefixWidthCache);
+  // Pre-split oversized tokens so the DP step always has feasible line candidates.
+  auto wordWidths = calculateWordWidths(renderer, fontId, pageWidth);
+  auto lineBreakIndices = computeLineBreaks(renderer, fontId, pageWidth, spaceWidth, wordWidths);
  const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;

  for (size_t i = 0; i < lineCount; ++i) {
@ -112,7 +95,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
 }

 std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId,
-                                                      const int pageWidth, PrefixWidthCache& cache) {
+                                                      const int pageWidth) {
  const size_t totalWordCount = words.size();

  std::vector<uint16_t> wordWidths;
@ -131,9 +114,8 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
    uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt);

    if (width > pageWidth) {
-      // Pre-split oversized tokens so the DP step always has feasible line candidates.
      HyphenSplitDecision decision;
-      if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, cache, &decision)) {
+      if (chooseSplitForWidth(renderer, fontId, *wordsIt, *wordStylesIt, pageWidth, true, &decision)) {
        const std::string originalWord = *wordsIt;
        const std::string tail = originalWord.substr(decision.byteOffset);
        if (tail.empty()) {
@ -164,14 +146,12 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
 }

 std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth,
-                                                  const int spaceWidth, std::vector<uint16_t>& wordWidths,
-                                                  PrefixWidthCache& cache) {
+                                                  const int spaceWidth, std::vector<uint16_t>& wordWidths) {
  if (words.empty()) {
    return {};
  }

  std::vector<HyphenationGuard> guards;
-  std::vector<int> lineWordWidthSums;

  auto shiftGuardIndices = [&](size_t insertPos) {
    for (auto& guard : guards) {
@ -187,17 +167,8 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
  auto runDp = [&](std::vector<size_t>& lineBreaks) {
    const size_t totalWordCount = wordWidths.size();

-    // DP table to store the minimum badness (cost) of lines starting at index i
    std::vector<int> dp(totalWordCount);
-    // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
    std::vector<size_t> ans(totalWordCount);
-    lineWordWidthSums.assign(totalWordCount, 0);
-
-    if (!wordWidths.empty()) {
-      lineWordWidthSums.back() = wordWidths.back();
-    }
-
-    // Base Case

    dp[totalWordCount - 1] = 0;
    ans[totalWordCount - 1] = totalWordCount - 1;
@ -207,7 +178,6 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
      dp[i] = MAX_COST;

      for (size_t j = i; j < totalWordCount; ++j) {
-        // Current line length: previous width + space + current word width
        currlen += wordWidths[j] + spaceWidth;

        if (currlen > pageWidth) {
@ -230,17 +200,13 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
          cost = 0;
        } else {
          const int remainingSpace = pageWidth - currlen;
-          // Use long long for the square to prevent overflow
          const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
          cost = cost_ll > MAX_COST ? MAX_COST : static_cast<int>(cost_ll);
        }

        if (cost < dp[i]) {
          dp[i] = cost;
-          ans[i] = j; // j is the index of the last word in this optimal line
-          const size_t wordsInLine = j - i + 1;
-          const int spacesWidth = wordsInLine > 1 ? static_cast<int>(wordsInLine - 1) * spaceWidth : 0;
-          lineWordWidthSums[i] = currlen - spacesWidth;
+          ans[i] = j;
        }
      }
    }
@ -273,7 +239,10 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
      const bool isLastLine = lineIdx == lineBreakIndices.size() - 1;
      const size_t lineWordCount = lineBreak - lastBreakAt;

-      const int lineWordWidthSum = (lastBreakAt < lineWordWidthSums.size()) ? lineWordWidthSums[lastBreakAt] : 0;
+      int lineWordWidthSum = 0;
+      for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
+        lineWordWidthSum += wordWidths[idx];
+      }
      lastBreakAt = lineBreak;

      if (isLastLine || lineBreak >= wordWidths.size()) {
@ -299,7 +268,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
      }

      HyphenSplitDecision decision;
-      if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, cache, &decision)) {
+      if (!chooseSplitForWidth(renderer, fontId, *nextWordIt, *nextStyleIt, budgetForPrefix, false, &decision)) {
        continue;
      }

@ -338,23 +307,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
  const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
  const size_t lineWordCount = lineBreak - lastBreakAt;

-  // Calculate total word width for this line
  int lineWordWidthSum = 0;
  for (size_t idx = lastBreakAt; idx < lineBreak; ++idx) {
    lineWordWidthSum += wordWidths[idx];
  }
+  const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;

-  // Calculate spacing
  const int spareSpace = pageWidth - lineWordWidthSum;

  int spacing = spaceWidth;
-  const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
-
  if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
    spacing = spareSpace / (lineWordCount - 1);
  }

-  // Calculate initial x position
  uint16_t xpos = 0;
  if (style == TextBlock::RIGHT_ALIGN) {
    xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
@ -362,7 +327,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
    xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
  }

-  // Pre-calculate X positions for words
  std::list<uint16_t> lineXPos;
  for (size_t i = lastBreakAt; i < lineBreak; i++) {
    const uint16_t currentWordWidth = wordWidths[i];
@ -370,13 +334,11 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
    xpos += currentWordWidth + spacing;
  }

-  // Iterators always start at the beginning as we are moving content with splice below
  auto wordEndIt = words.begin();
  auto wordStyleEndIt = wordStyles.begin();
  std::advance(wordEndIt, lineWordCount);
  std::advance(wordStyleEndIt, lineWordCount);

-  // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
  std::list<std::string> lineWords;
  lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
  std::list<EpdFontStyle> lineWordStyles;
--- a/lib/Epub/Epub/ParsedText.h
+++ b/lib/Epub/Epub/ParsedText.h
@ -6,7 +6,6 @@
 #include <list>
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <vector>

 #include "blocks/TextBlock.h"
@ -14,23 +13,6 @@
 class GfxRenderer;

 class ParsedText {
- public:
-  using PrefixWidthCache = std::unordered_map<const void*, std::unordered_map<size_t, uint16_t>>;
-
-  explicit ParsedText(const TextBlock::BLOCK_STYLE style, bool extraParagraphSpacing, bool hyphenationEnabled)
-      : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {}
-  ~ParsedText() = default;
-
-  void addWord(std::string word, EpdFontStyle fontStyle);
-  void setStyle(TextBlock::BLOCK_STYLE style) { this->style = style; }
-  TextBlock::BLOCK_STYLE getStyle() const { return style; }
-  size_t size() const { return words.size(); }
-  bool isEmpty() const { return words.empty(); }
-  void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin,
-                             const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
-                             bool includeLastLine = true);
-
- private:
  std::list<std::string> words;
  std::list<EpdFontStyle> wordStyles;
  TextBlock::BLOCK_STYLE style;
@ -38,10 +20,24 @@ class ParsedText {
  bool hyphenationEnabled;

  std::vector<size_t> computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth, int spaceWidth,
-                                        std::vector<uint16_t>& wordWidths, PrefixWidthCache& cache);
+                                        std::vector<uint16_t>& wordWidths);
  void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
                   const std::vector<size_t>& lineBreakIndices,
                   const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
-  std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth,
-                                            PrefixWidthCache& cache);
+  std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId, int pageWidth);
+
+ public:
+  explicit ParsedText(const TextBlock::BLOCK_STYLE style, const bool extraParagraphSpacing,
+                      const bool hyphenationEnabled)
+      : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {}
+  ~ParsedText() = default;
+
+  void addWord(std::string word, EpdFontStyle fontStyle);
+  void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; }
+  TextBlock::BLOCK_STYLE getStyle() const { return style; }
+  size_t size() const { return words.size(); }
+  bool isEmpty() const { return words.empty(); }
+  void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin,
+                             const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
+                             bool includeLastLine = true);
 };