diff --git a/lib/Epub/Epub/EpubHtmlParserSlim.cpp b/lib/Epub/Epub/EpubHtmlParserSlim.cpp
index 420e8a8..9d7ef52 100644
--- a/lib/Epub/Epub/EpubHtmlParserSlim.cpp
+++ b/lib/Epub/Epub/EpubHtmlParserSlim.cpp
@@ -117,13 +117,21 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
return;
}
+ EpdFontStyle fontStyle = REGULAR;
+ if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
+ fontStyle = BOLD_ITALIC;
+ } else if (self->boldUntilDepth < self->depth) {
+ fontStyle = BOLD;
+ } else if (self->italicUntilDepth < self->depth) {
+ fontStyle = ITALIC;
+ }
+
for (int i = 0; i < len; i++) {
if (isWhitespace(s[i])) {
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
- self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)),
- self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth);
+ self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
// Skip the whitespace char
@@ -133,8 +141,7 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
- self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)),
- self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth);
+ self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
@@ -156,9 +163,17 @@ void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name
matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1;
if (shouldBreakText) {
+ EpdFontStyle fontStyle = REGULAR;
+ if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) {
+ fontStyle = BOLD_ITALIC;
+ } else if (self->boldUntilDepth < self->depth) {
+ fontStyle = BOLD;
+ } else if (self->italicUntilDepth < self->depth) {
+ fontStyle = ITALIC;
+ }
+
self->partWordBuffer[self->partWordBufferIndex] = '\0';
- self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)),
- self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth);
+ self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle);
self->partWordBufferIndex = 0;
}
}
@@ -263,7 +278,7 @@ void EpubHtmlParserSlim::makePages() {
// Long running task, make sure to let other things happen
vTaskDelay(1);
- const auto lines = currentTextBlock->splitIntoLines(renderer, fontId, marginLeft + marginRight);
+ const auto lines = currentTextBlock->layoutAndExtractLines(renderer, fontId, marginLeft + marginRight);
for (auto&& line : lines) {
if (currentPageNextY + lineHeight > pageHeight) {
diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
index f8d9b9b..8e050a5 100644
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@@ -1,144 +1,137 @@
#include "ParsedText.h"
#include
-#include
+#include
+#include
+#include
#include
-void ParsedText::addWord(std::string word, const bool is_bold, const bool is_italic) {
- if (word.length() == 0) return;
+constexpr int MAX_COST = std::numeric_limits::max();
+
+void ParsedText::addWord(std::string word, const EpdFontStyle fontStyle) {
+ if (word.empty()) return;
words.push_back(std::move(word));
- wordStyles.push_back((is_bold ? TextBlock::BOLD_SPAN : 0) | (is_italic ? TextBlock::ITALIC_SPAN : 0));
+ wordStyles.push_back(fontStyle);
}
-// Consumes data
-std::list> ParsedText::splitIntoLines(const GfxRenderer& renderer, const int fontId,
- const int horizontalMargin) {
- const int totalWordCount = words.size();
- const int pageWidth = GfxRenderer::getScreenWidth() - horizontalMargin;
+// Consumes data to minimize memory usage
+std::list> ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId,
+ const int horizontalMargin) {
+ if (words.empty()) {
+ return {};
+ }
+
+ const size_t totalWordCount = words.size();
+ const int pageWidth = renderer.getScreenWidth() - horizontalMargin;
const int spaceWidth = renderer.getSpaceWidth(fontId);
- // measure each word
std::vector wordWidths;
- {
- auto wordsIt = words.begin();
- auto wordStylesIt = wordStyles.begin();
- while (wordsIt != words.end() && wordStylesIt != wordStyles.end()) {
- // measure the word
- EpdFontStyle fontStyle = REGULAR;
- if (*wordStylesIt & TextBlock::BOLD_SPAN) {
- if (*wordStylesIt & TextBlock::ITALIC_SPAN) {
- fontStyle = BOLD_ITALIC;
- } else {
- fontStyle = BOLD;
- }
- } else if (*wordStylesIt & TextBlock::ITALIC_SPAN) {
- fontStyle = ITALIC;
- }
- const int width = renderer.getTextWidth(fontId, wordsIt->c_str(), fontStyle);
- wordWidths.push_back(width);
- std::advance(wordsIt, 1);
- std::advance(wordStylesIt, 1);
- }
+ wordWidths.reserve(totalWordCount);
+
+ auto wordsIt = words.begin();
+ auto wordStylesIt = wordStyles.begin();
+
+ while (wordsIt != words.end()) {
+ wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt));
+
+ std::advance(wordsIt, 1);
+ std::advance(wordStylesIt, 1);
}
- // Array in which ans[i] store index of last word in line starting with word
- // word[i]
- size_t ans[totalWordCount];
- {
- // now apply the dynamic programming algorithm to find the best line breaks
- // DP table in which dp[i] represents cost of line starting with word words[i]
- int dp[totalWordCount];
+ // DP table to store the minimum badness (cost) of lines starting at index i
+ std::vector dp(totalWordCount);
+ // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
+ std::vector ans(totalWordCount);
- // If only one word is present then only one line is required. Cost of last
- // line is zero. Hence cost of this line is zero. Ending point is also n-1 as
- // single word is present
- dp[totalWordCount - 1] = 0;
- ans[totalWordCount - 1] = totalWordCount - 1;
+ // Base Case
+ dp[totalWordCount - 1] = 0;
+ ans[totalWordCount - 1] = totalWordCount - 1;
- // Make each word first word of line by iterating over each index in arr.
- for (int i = totalWordCount - 2; i >= 0; i--) {
- int currlen = -1;
- dp[i] = INT_MAX;
+ for (int i = totalWordCount - 2; i >= 0; --i) {
+ int currlen = -spaceWidth;
+ dp[i] = MAX_COST;
+
+ for (size_t j = i; j < totalWordCount; ++j) {
+ // Current line length: previous width + space + current word width
+ currlen += wordWidths[j] + spaceWidth;
+
+ if (currlen > pageWidth) {
+ break;
+ }
- // Variable to store possible minimum cost of line.
int cost;
+ if (j == totalWordCount - 1) {
+ cost = 0; // Last line
+ } else {
+ const int remainingSpace = pageWidth - currlen;
+ // Use long long for the square to prevent overflow
+ const long long cost_ll = static_cast(remainingSpace) * remainingSpace + dp[j + 1];
- // Keep on adding words in current line by iterating from starting word upto
- // last word in arr.
- for (int j = i; j < totalWordCount; j++) {
- // Update the width of the words in current line + the space between two
- // words.
- currlen += wordWidths[j] + spaceWidth;
-
- // If we're bigger than the current pagewidth then we can't add more words
- if (currlen > pageWidth) break;
-
- // if we've run out of words then this is last line and the cost should be
- // 0 Otherwise the cost is the sqaure of the left over space + the costs
- // of all the previous lines
- if (j == totalWordCount - 1)
- cost = 0;
- else
- cost = (pageWidth - currlen) * (pageWidth - currlen) + dp[j + 1];
-
- // Check if this arrangement gives minimum cost for line starting with
- // word words[i].
- if (cost < dp[i]) {
- dp[i] = cost;
- ans[i] = j;
+ if (cost_ll > MAX_COST) {
+ cost = MAX_COST;
+ } else {
+ cost = static_cast(cost_ll);
}
}
+
+ if (cost < dp[i]) {
+ dp[i] = cost;
+ ans[i] = j; // j is the index of the last word in this optimal line
+ }
}
}
- // We can now iterate through the answer to find the line break positions
- std::list lineBreaks;
- for (size_t i = 0; i < totalWordCount;) {
- i = ans[i] + 1;
- if (i > totalWordCount) {
- break;
- }
- lineBreaks.push_back(i);
- // Text too big, just exit
- if (lineBreaks.size() > 1000) {
+ // Stores the index of the word that starts the next line (last_word_index + 1)
+ std::vector lineBreakIndices;
+ size_t currentWordIndex = 0;
+ constexpr size_t MAX_LINES = 1000;
+
+ while (currentWordIndex < totalWordCount) {
+ if (lineBreakIndices.size() >= MAX_LINES) {
break;
}
+
+ size_t nextBreakIndex = ans[currentWordIndex] + 1;
+ lineBreakIndices.push_back(nextBreakIndex);
+
+ currentWordIndex = nextBreakIndex;
}
std::list> lines;
- // With the line breaks calculated we can now position the words along the
- // line
+ // Initialize iterators for consumption
auto wordStartIt = words.begin();
auto wordStyleStartIt = wordStyles.begin();
- auto wordWidthStartIt = wordWidths.begin();
- uint16_t lastBreakAt = 0;
- for (const auto lineBreak : lineBreaks) {
- const int lineWordCount = lineBreak - lastBreakAt;
+ size_t wordWidthIndex = 0;
+ size_t lastBreakAt = 0;
+ for (const size_t lineBreak : lineBreakIndices) {
+ const size_t lineWordCount = lineBreak - lastBreakAt;
+
+ // Calculate end iterators for the range to splice
auto wordEndIt = wordStartIt;
auto wordStyleEndIt = wordStyleStartIt;
- auto wordWidthEndIt = wordWidthStartIt;
std::advance(wordEndIt, lineWordCount);
std::advance(wordStyleEndIt, lineWordCount);
- std::advance(wordWidthEndIt, lineWordCount);
+ // Calculate total word width for this line
int lineWordWidthSum = 0;
- for (auto it = wordWidthStartIt; it != wordWidthEndIt; std::advance(it, 1)) {
- lineWordWidthSum += *it;
+ for (size_t i = 0; i < lineWordCount; ++i) {
+ lineWordWidthSum += wordWidths[wordWidthIndex + i];
}
- // Calculate spacing between words
- const uint16_t spareSpace = pageWidth - lineWordWidthSum;
- uint16_t spacing = spaceWidth;
- // evenly space words if using justified style, not the last line, and at
- // least 2 words
- if (style == TextBlock::JUSTIFIED && lineBreak != lineBreaks.back() && lineWordCount >= 2) {
+ // Calculate spacing
+ const int spareSpace = pageWidth - lineWordWidthSum;
+ int spacing = spaceWidth;
+ const bool isLastLine = lineBreak == totalWordCount;
+
+ if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
spacing = spareSpace / (lineWordCount - 1);
}
+ // Calculate initial x position
uint16_t xpos = 0;
if (style == TextBlock::RIGHT_ALIGN) {
xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
@@ -146,24 +139,27 @@ std::list> ParsedText::splitIntoLines(const GfxRender
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
}
+ // Pre-calculate X positions for words
std::list lineXPos;
-
- for (auto it = wordWidthStartIt; it != wordWidthEndIt; std::advance(it, 1)) {
+ for (size_t i = 0; i < lineWordCount; ++i) {
+ const uint16_t currentWordWidth = wordWidths[wordWidthIndex + i];
lineXPos.push_back(xpos);
- xpos += *it + spacing;
+ xpos += currentWordWidth + spacing;
}
+ // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
std::list lineWords;
- std::list lineWordStyles;
lineWords.splice(lineWords.begin(), words, wordStartIt, wordEndIt);
+ std::list lineWordStyles;
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyleStartIt, wordStyleEndIt);
lines.push_back(
std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
+ // Update pointers/indices for the next line
wordStartIt = wordEndIt;
wordStyleStartIt = wordStyleEndIt;
- wordWidthStartIt = wordWidthEndIt;
+ wordWidthIndex += lineWordCount;
lastBreakAt = lineBreak;
}
diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h
index 13fe815..05f2532 100644
--- a/lib/Epub/Epub/ParsedText.h
+++ b/lib/Epub/Epub/ParsedText.h
@@ -1,24 +1,29 @@
#pragma once
+
+#include
+
+#include
#include
+#include
#include
#include "blocks/TextBlock.h"
+class GfxRenderer;
+
class ParsedText {
std::list words;
- std::list wordStyles;
-
- // the style of the block - left, center, right aligned
+ std::list wordStyles;
TextBlock::BLOCK_STYLE style;
public:
explicit ParsedText(const TextBlock::BLOCK_STYLE style) : style(style) {}
- explicit ParsedText(std::list words, std::list word_styles, const TextBlock::BLOCK_STYLE style)
- : words(std::move(words)), wordStyles(std::move(word_styles)), style(style) {}
~ParsedText() = default;
- void addWord(std::string word, bool is_bold, bool is_italic);
+
+ void addWord(std::string word, EpdFontStyle fontStyle);
void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; }
TextBlock::BLOCK_STYLE getStyle() const { return style; }
bool isEmpty() const { return words.empty(); }
- std::list> splitIntoLines(const GfxRenderer& renderer, int fontId, int horizontalMargin);
+ std::list> layoutAndExtractLines(const GfxRenderer& renderer, int fontId,
+ int horizontalMargin);
};
diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp
index 42d5d2e..cc3cb60 100644
--- a/lib/Epub/Epub/blocks/TextBlock.cpp
+++ b/lib/Epub/Epub/blocks/TextBlock.cpp
@@ -3,29 +3,13 @@
#include
#include
-void TextBlock::addWord(std::string word, const bool is_bold, const bool is_italic) {
- if (word.length() == 0) return;
-
- words.push_back(std::move(word));
- wordStyles.push_back((is_bold ? BOLD_SPAN : 0) | (is_italic ? ITALIC_SPAN : 0));
-}
-
void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int x, const int y) const {
auto wordIt = words.begin();
auto wordStylesIt = wordStyles.begin();
auto wordXposIt = wordXpos.begin();
for (int i = 0; i < words.size(); i++) {
- // render the word
- EpdFontStyle fontStyle = REGULAR;
- if (*wordStylesIt & BOLD_SPAN && *wordStylesIt & ITALIC_SPAN) {
- fontStyle = BOLD_ITALIC;
- } else if (*wordStylesIt & BOLD_SPAN) {
- fontStyle = BOLD;
- } else if (*wordStylesIt & ITALIC_SPAN) {
- fontStyle = ITALIC;
- }
- renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, fontStyle);
+ renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt);
std::advance(wordIt, 1);
std::advance(wordStylesIt, 1);
@@ -57,7 +41,7 @@ std::unique_ptr TextBlock::deserialize(std::istream& is) {
uint32_t wc, xc, sc;
std::list words;
std::list wordXpos;
- std::list wordStyles;
+ std::list wordStyles;
BLOCK_STYLE style;
// words
diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h
index b5a4921..4b2b031 100644
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@@ -1,4 +1,6 @@
#pragma once
+#include
+
#include
#include
#include
@@ -8,11 +10,6 @@
// represents a block of words in the html document
class TextBlock final : public Block {
public:
- enum SPAN_STYLE : uint8_t {
- BOLD_SPAN = 1,
- ITALIC_SPAN = 2,
- };
-
enum BLOCK_STYLE : uint8_t {
JUSTIFIED = 0,
LEFT_ALIGN = 1,
@@ -23,19 +20,14 @@ class TextBlock final : public Block {
private:
std::list words;
std::list wordXpos;
- std::list wordStyles;
-
- // the style of the block - left, center, right aligned
+ std::list wordStyles;
BLOCK_STYLE style;
public:
- explicit TextBlock(const BLOCK_STYLE style) : style(style) {}
- explicit TextBlock(std::list words, std::list word_xpos,
- // the styles of each word
- std::list word_styles, const BLOCK_STYLE style)
+ explicit TextBlock(std::list words, std::list word_xpos, std::list word_styles,
+ const BLOCK_STYLE style)
: words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {}
~TextBlock() override = default;
- void addWord(std::string word, bool is_bold, bool is_italic);
void setStyle(const BLOCK_STYLE style) { this->style = style; }
BLOCK_STYLE getStyle() const { return style; }
bool isEmpty() override { return words.empty(); }
diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h
index a927bbb..7482cb9 100644
--- a/lib/GfxRenderer/GfxRenderer.h
+++ b/lib/GfxRenderer/GfxRenderer.h
@@ -1,11 +1,10 @@
#pragma once
#include
+#include
#include