diff --git a/lib/Epub/Epub/EpubHtmlParserSlim.cpp b/lib/Epub/Epub/EpubHtmlParserSlim.cpp index 420e8a8..9d7ef52 100644 --- a/lib/Epub/Epub/EpubHtmlParserSlim.cpp +++ b/lib/Epub/Epub/EpubHtmlParserSlim.cpp @@ -117,13 +117,21 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s return; } + EpdFontStyle fontStyle = REGULAR; + if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + fontStyle = BOLD_ITALIC; + } else if (self->boldUntilDepth < self->depth) { + fontStyle = BOLD; + } else if (self->italicUntilDepth < self->depth) { + fontStyle = ITALIC; + } + for (int i = 0; i < len; i++) { if (isWhitespace(s[i])) { // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), - self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth); + self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle); self->partWordBufferIndex = 0; } // Skip the whitespace char @@ -133,8 +141,7 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), - self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth); + self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle); self->partWordBufferIndex = 0; } @@ -156,9 +163,17 @@ void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; if (shouldBreakText) { + EpdFontStyle fontStyle = REGULAR; + if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + fontStyle = BOLD_ITALIC; + } else if (self->boldUntilDepth < self->depth) { + fontStyle = BOLD; + } else if (self->italicUntilDepth < self->depth) { + fontStyle = ITALIC; + } + self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), - self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth); + self->currentTextBlock->addWord(std::move(replaceHtmlEntities(self->partWordBuffer)), fontStyle); self->partWordBufferIndex = 0; } } @@ -263,7 +278,7 @@ void EpubHtmlParserSlim::makePages() { // Long running task, make sure to let other things happen vTaskDelay(1); - const auto lines = currentTextBlock->splitIntoLines(renderer, fontId, marginLeft + marginRight); + const auto lines = currentTextBlock->layoutAndExtractLines(renderer, fontId, marginLeft + marginRight); for (auto&& line : lines) { if (currentPageNextY + lineHeight > pageHeight) { diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index f8d9b9b..8e050a5 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -1,144 +1,137 @@ #include "ParsedText.h" #include -#include +#include +#include +#include #include -void ParsedText::addWord(std::string word, const bool is_bold, const bool is_italic) { - if (word.length() == 0) return; +constexpr int MAX_COST = std::numeric_limits::max(); + +void ParsedText::addWord(std::string word, const EpdFontStyle fontStyle) { + if (word.empty()) return; words.push_back(std::move(word)); - wordStyles.push_back((is_bold ? TextBlock::BOLD_SPAN : 0) | (is_italic ? TextBlock::ITALIC_SPAN : 0)); + wordStyles.push_back(fontStyle); } -// Consumes data -std::list> ParsedText::splitIntoLines(const GfxRenderer& renderer, const int fontId, - const int horizontalMargin) { - const int totalWordCount = words.size(); - const int pageWidth = GfxRenderer::getScreenWidth() - horizontalMargin; +// Consumes data to minimize memory usage +std::list> ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, + const int horizontalMargin) { + if (words.empty()) { + return {}; + } + + const size_t totalWordCount = words.size(); + const int pageWidth = renderer.getScreenWidth() - horizontalMargin; const int spaceWidth = renderer.getSpaceWidth(fontId); - // measure each word std::vector wordWidths; - { - auto wordsIt = words.begin(); - auto wordStylesIt = wordStyles.begin(); - while (wordsIt != words.end() && wordStylesIt != wordStyles.end()) { - // measure the word - EpdFontStyle fontStyle = REGULAR; - if (*wordStylesIt & TextBlock::BOLD_SPAN) { - if (*wordStylesIt & TextBlock::ITALIC_SPAN) { - fontStyle = BOLD_ITALIC; - } else { - fontStyle = BOLD; - } - } else if (*wordStylesIt & TextBlock::ITALIC_SPAN) { - fontStyle = ITALIC; - } - const int width = renderer.getTextWidth(fontId, wordsIt->c_str(), fontStyle); - wordWidths.push_back(width); - std::advance(wordsIt, 1); - std::advance(wordStylesIt, 1); - } + wordWidths.reserve(totalWordCount); + + auto wordsIt = words.begin(); + auto wordStylesIt = wordStyles.begin(); + + while (wordsIt != words.end()) { + wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt)); + + std::advance(wordsIt, 1); + std::advance(wordStylesIt, 1); } - // Array in which ans[i] store index of last word in line starting with word - // word[i] - size_t ans[totalWordCount]; - { - // now apply the dynamic programming algorithm to find the best line breaks - // DP table in which dp[i] represents cost of line starting with word words[i] - int dp[totalWordCount]; + // DP table to store the minimum badness (cost) of lines starting at index i + std::vector dp(totalWordCount); + // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i' + std::vector ans(totalWordCount); - // If only one word is present then only one line is required. Cost of last - // line is zero. Hence cost of this line is zero. Ending point is also n-1 as - // single word is present - dp[totalWordCount - 1] = 0; - ans[totalWordCount - 1] = totalWordCount - 1; + // Base Case + dp[totalWordCount - 1] = 0; + ans[totalWordCount - 1] = totalWordCount - 1; - // Make each word first word of line by iterating over each index in arr. - for (int i = totalWordCount - 2; i >= 0; i--) { - int currlen = -1; - dp[i] = INT_MAX; + for (int i = totalWordCount - 2; i >= 0; --i) { + int currlen = -spaceWidth; + dp[i] = MAX_COST; + + for (size_t j = i; j < totalWordCount; ++j) { + // Current line length: previous width + space + current word width + currlen += wordWidths[j] + spaceWidth; + + if (currlen > pageWidth) { + break; + } - // Variable to store possible minimum cost of line. int cost; + if (j == totalWordCount - 1) { + cost = 0; // Last line + } else { + const int remainingSpace = pageWidth - currlen; + // Use long long for the square to prevent overflow + const long long cost_ll = static_cast(remainingSpace) * remainingSpace + dp[j + 1]; - // Keep on adding words in current line by iterating from starting word upto - // last word in arr. - for (int j = i; j < totalWordCount; j++) { - // Update the width of the words in current line + the space between two - // words. - currlen += wordWidths[j] + spaceWidth; - - // If we're bigger than the current pagewidth then we can't add more words - if (currlen > pageWidth) break; - - // if we've run out of words then this is last line and the cost should be - // 0 Otherwise the cost is the sqaure of the left over space + the costs - // of all the previous lines - if (j == totalWordCount - 1) - cost = 0; - else - cost = (pageWidth - currlen) * (pageWidth - currlen) + dp[j + 1]; - - // Check if this arrangement gives minimum cost for line starting with - // word words[i]. - if (cost < dp[i]) { - dp[i] = cost; - ans[i] = j; + if (cost_ll > MAX_COST) { + cost = MAX_COST; + } else { + cost = static_cast(cost_ll); } } + + if (cost < dp[i]) { + dp[i] = cost; + ans[i] = j; // j is the index of the last word in this optimal line + } } } - // We can now iterate through the answer to find the line break positions - std::list lineBreaks; - for (size_t i = 0; i < totalWordCount;) { - i = ans[i] + 1; - if (i > totalWordCount) { - break; - } - lineBreaks.push_back(i); - // Text too big, just exit - if (lineBreaks.size() > 1000) { + // Stores the index of the word that starts the next line (last_word_index + 1) + std::vector lineBreakIndices; + size_t currentWordIndex = 0; + constexpr size_t MAX_LINES = 1000; + + while (currentWordIndex < totalWordCount) { + if (lineBreakIndices.size() >= MAX_LINES) { break; } + + size_t nextBreakIndex = ans[currentWordIndex] + 1; + lineBreakIndices.push_back(nextBreakIndex); + + currentWordIndex = nextBreakIndex; } std::list> lines; - // With the line breaks calculated we can now position the words along the - // line + // Initialize iterators for consumption auto wordStartIt = words.begin(); auto wordStyleStartIt = wordStyles.begin(); - auto wordWidthStartIt = wordWidths.begin(); - uint16_t lastBreakAt = 0; - for (const auto lineBreak : lineBreaks) { - const int lineWordCount = lineBreak - lastBreakAt; + size_t wordWidthIndex = 0; + size_t lastBreakAt = 0; + for (const size_t lineBreak : lineBreakIndices) { + const size_t lineWordCount = lineBreak - lastBreakAt; + + // Calculate end iterators for the range to splice auto wordEndIt = wordStartIt; auto wordStyleEndIt = wordStyleStartIt; - auto wordWidthEndIt = wordWidthStartIt; std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); - std::advance(wordWidthEndIt, lineWordCount); + // Calculate total word width for this line int lineWordWidthSum = 0; - for (auto it = wordWidthStartIt; it != wordWidthEndIt; std::advance(it, 1)) { - lineWordWidthSum += *it; + for (size_t i = 0; i < lineWordCount; ++i) { + lineWordWidthSum += wordWidths[wordWidthIndex + i]; } - // Calculate spacing between words - const uint16_t spareSpace = pageWidth - lineWordWidthSum; - uint16_t spacing = spaceWidth; - // evenly space words if using justified style, not the last line, and at - // least 2 words - if (style == TextBlock::JUSTIFIED && lineBreak != lineBreaks.back() && lineWordCount >= 2) { + // Calculate spacing + const int spareSpace = pageWidth - lineWordWidthSum; + int spacing = spaceWidth; + const bool isLastLine = lineBreak == totalWordCount; + + if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { spacing = spareSpace / (lineWordCount - 1); } + // Calculate initial x position uint16_t xpos = 0; if (style == TextBlock::RIGHT_ALIGN) { xpos = spareSpace - (lineWordCount - 1) * spaceWidth; @@ -146,24 +139,27 @@ std::list> ParsedText::splitIntoLines(const GfxRender xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; } + // Pre-calculate X positions for words std::list lineXPos; - - for (auto it = wordWidthStartIt; it != wordWidthEndIt; std::advance(it, 1)) { + for (size_t i = 0; i < lineWordCount; ++i) { + const uint16_t currentWordWidth = wordWidths[wordWidthIndex + i]; lineXPos.push_back(xpos); - xpos += *it + spacing; + xpos += currentWordWidth + spacing; } + // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; - std::list lineWordStyles; lineWords.splice(lineWords.begin(), words, wordStartIt, wordEndIt); + std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyleStartIt, wordStyleEndIt); lines.push_back( std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); + // Update pointers/indices for the next line wordStartIt = wordEndIt; wordStyleStartIt = wordStyleEndIt; - wordWidthStartIt = wordWidthEndIt; + wordWidthIndex += lineWordCount; lastBreakAt = lineBreak; } diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index 13fe815..05f2532 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -1,24 +1,29 @@ #pragma once + +#include + +#include #include +#include #include #include "blocks/TextBlock.h" +class GfxRenderer; + class ParsedText { std::list words; - std::list wordStyles; - - // the style of the block - left, center, right aligned + std::list wordStyles; TextBlock::BLOCK_STYLE style; public: explicit ParsedText(const TextBlock::BLOCK_STYLE style) : style(style) {} - explicit ParsedText(std::list words, std::list word_styles, const TextBlock::BLOCK_STYLE style) - : words(std::move(words)), wordStyles(std::move(word_styles)), style(style) {} ~ParsedText() = default; - void addWord(std::string word, bool is_bold, bool is_italic); + + void addWord(std::string word, EpdFontStyle fontStyle); void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; } TextBlock::BLOCK_STYLE getStyle() const { return style; } bool isEmpty() const { return words.empty(); } - std::list> splitIntoLines(const GfxRenderer& renderer, int fontId, int horizontalMargin); + std::list> layoutAndExtractLines(const GfxRenderer& renderer, int fontId, + int horizontalMargin); }; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 42d5d2e..cc3cb60 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -3,29 +3,13 @@ #include #include -void TextBlock::addWord(std::string word, const bool is_bold, const bool is_italic) { - if (word.length() == 0) return; - - words.push_back(std::move(word)); - wordStyles.push_back((is_bold ? BOLD_SPAN : 0) | (is_italic ? ITALIC_SPAN : 0)); -} - void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int x, const int y) const { auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); for (int i = 0; i < words.size(); i++) { - // render the word - EpdFontStyle fontStyle = REGULAR; - if (*wordStylesIt & BOLD_SPAN && *wordStylesIt & ITALIC_SPAN) { - fontStyle = BOLD_ITALIC; - } else if (*wordStylesIt & BOLD_SPAN) { - fontStyle = BOLD; - } else if (*wordStylesIt & ITALIC_SPAN) { - fontStyle = ITALIC; - } - renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, fontStyle); + renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt); std::advance(wordIt, 1); std::advance(wordStylesIt, 1); @@ -57,7 +41,7 @@ std::unique_ptr TextBlock::deserialize(std::istream& is) { uint32_t wc, xc, sc; std::list words; std::list wordXpos; - std::list wordStyles; + std::list wordStyles; BLOCK_STYLE style; // words diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index b5a4921..4b2b031 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -1,4 +1,6 @@ #pragma once +#include + #include #include #include @@ -8,11 +10,6 @@ // represents a block of words in the html document class TextBlock final : public Block { public: - enum SPAN_STYLE : uint8_t { - BOLD_SPAN = 1, - ITALIC_SPAN = 2, - }; - enum BLOCK_STYLE : uint8_t { JUSTIFIED = 0, LEFT_ALIGN = 1, @@ -23,19 +20,14 @@ class TextBlock final : public Block { private: std::list words; std::list wordXpos; - std::list wordStyles; - - // the style of the block - left, center, right aligned + std::list wordStyles; BLOCK_STYLE style; public: - explicit TextBlock(const BLOCK_STYLE style) : style(style) {} - explicit TextBlock(std::list words, std::list word_xpos, - // the styles of each word - std::list word_styles, const BLOCK_STYLE style) + explicit TextBlock(std::list words, std::list word_xpos, std::list word_styles, + const BLOCK_STYLE style) : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {} ~TextBlock() override = default; - void addWord(std::string word, bool is_bold, bool is_italic); void setStyle(const BLOCK_STYLE style) { this->style = style; } BLOCK_STYLE getStyle() const { return style; } bool isEmpty() override { return words.empty(); } diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h index a927bbb..7482cb9 100644 --- a/lib/GfxRenderer/GfxRenderer.h +++ b/lib/GfxRenderer/GfxRenderer.h @@ -1,11 +1,10 @@ #pragma once #include +#include #include -#include "EpdFontFamily.h" - class GfxRenderer { public: enum FontRenderMode { BW, GRAYSCALE_LSB, GRAYSCALE_MSB };