Build out lines when parsing html and holding >750 words in buffer (#73)

## Summary

* Build out lines for pages when holding over 750 buffered words
* Should fix issues with parsing long blocks of text causing memory
crashes
This commit is contained in:
Dave Allie 2025-12-21 13:43:19 +11:00 committed by GitHub
parent 9a3bb81337
commit 299623927e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 96 additions and 66 deletions

View File

@ -19,14 +19,25 @@ void ParsedText::addWord(std::string word, const EpdFontStyle fontStyle) {
// Consumes data to minimize memory usage // Consumes data to minimize memory usage
void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const int horizontalMargin, void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fontId, const int horizontalMargin,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine) { const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
const bool includeLastLine) {
if (words.empty()) { if (words.empty()) {
return; return;
} }
const size_t totalWordCount = words.size();
const int pageWidth = renderer.getScreenWidth() - horizontalMargin; const int pageWidth = renderer.getScreenWidth() - horizontalMargin;
const int spaceWidth = renderer.getSpaceWidth(fontId); const int spaceWidth = renderer.getSpaceWidth(fontId);
const auto wordWidths = calculateWordWidths(renderer, fontId);
const auto lineBreakIndices = computeLineBreaks(pageWidth, spaceWidth, wordWidths);
const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;
for (size_t i = 0; i < lineCount; ++i) {
extractLine(i, pageWidth, spaceWidth, wordWidths, lineBreakIndices, processLine);
}
}
std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& renderer, const int fontId) {
const size_t totalWordCount = words.size();
std::vector<uint16_t> wordWidths; std::vector<uint16_t> wordWidths;
wordWidths.reserve(totalWordCount); wordWidths.reserve(totalWordCount);
@ -47,6 +58,13 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
std::advance(wordStylesIt, 1); std::advance(wordStylesIt, 1);
} }
return wordWidths;
}
std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int spaceWidth,
const std::vector<uint16_t>& wordWidths) const {
const size_t totalWordCount = words.size();
// DP table to store the minimum badness (cost) of lines starting at index i // DP table to store the minimum badness (cost) of lines starting at index i
std::vector<int> dp(totalWordCount); std::vector<int> dp(totalWordCount);
// 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i' // 'ans[i]' stores the index 'j' of the *last word* in the optimal line starting at 'i'
@ -106,66 +124,59 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
currentWordIndex = nextBreakIndex; currentWordIndex = nextBreakIndex;
} }
// Initialize iterators for consumption return lineBreakIndices;
auto wordStartIt = words.begin(); }
auto wordStyleStartIt = wordStyles.begin();
size_t wordWidthIndex = 0; void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
const std::vector<uint16_t>& wordWidths, const std::vector<size_t>& lineBreakIndices,
size_t lastBreakAt = 0; const std::function<void(std::shared_ptr<TextBlock>)>& processLine) {
for (const size_t lineBreak : lineBreakIndices) { const size_t lineBreak = lineBreakIndices[breakIndex];
const size_t lineWordCount = lineBreak - lastBreakAt; const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
const size_t lineWordCount = lineBreak - lastBreakAt;
// Calculate end iterators for the range to splice
auto wordEndIt = wordStartIt; // Calculate total word width for this line
auto wordStyleEndIt = wordStyleStartIt; int lineWordWidthSum = 0;
std::advance(wordEndIt, lineWordCount); for (size_t i = lastBreakAt; i < lineBreak; i++) {
std::advance(wordStyleEndIt, lineWordCount); lineWordWidthSum += wordWidths[i];
}
// Calculate total word width for this line
int lineWordWidthSum = 0; // Calculate spacing
for (size_t i = 0; i < lineWordCount; ++i) { const int spareSpace = pageWidth - lineWordWidthSum;
lineWordWidthSum += wordWidths[wordWidthIndex + i];
} int spacing = spaceWidth;
const bool isLastLine = lineBreak == words.size();
// Calculate spacing
int spareSpace = pageWidth - lineWordWidthSum; if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
spacing = spareSpace / (lineWordCount - 1);
int spacing = spaceWidth; }
const bool isLastLine = lineBreak == totalWordCount;
// Calculate initial x position
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { uint16_t xpos = 0;
spacing = spareSpace / (lineWordCount - 1); if (style == TextBlock::RIGHT_ALIGN) {
} xpos = spareSpace - (lineWordCount - 1) * spaceWidth;
} else if (style == TextBlock::CENTER_ALIGN) {
// Calculate initial x position xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2;
uint16_t xpos = 0; }
if (style == TextBlock::RIGHT_ALIGN) {
xpos = spareSpace - (lineWordCount - 1) * spaceWidth; // Pre-calculate X positions for words
} else if (style == TextBlock::CENTER_ALIGN) { std::list<uint16_t> lineXPos;
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; for (size_t i = lastBreakAt; i < lineBreak; i++) {
} const uint16_t currentWordWidth = wordWidths[i];
lineXPos.push_back(xpos);
// Pre-calculate X positions for words xpos += currentWordWidth + spacing;
std::list<uint16_t> lineXPos; }
for (size_t i = 0; i < lineWordCount; ++i) {
const uint16_t currentWordWidth = wordWidths[wordWidthIndex + i]; // Iterators always start at the beginning as we are moving content with splice below
lineXPos.push_back(xpos); auto wordEndIt = words.begin();
xpos += currentWordWidth + spacing; auto wordStyleEndIt = wordStyles.begin();
} std::advance(wordEndIt, lineWordCount);
std::advance(wordStyleEndIt, lineWordCount);
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
std::list<std::string> lineWords; // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
lineWords.splice(lineWords.begin(), words, wordStartIt, wordEndIt); std::list<std::string> lineWords;
std::list<EpdFontStyle> lineWordStyles; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyleStartIt, wordStyleEndIt); std::list<EpdFontStyle> lineWordStyles;
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
processLine(
std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
// Update pointers/indices for the next line
wordStartIt = wordEndIt;
wordStyleStartIt = wordStyleEndIt;
wordWidthIndex += lineWordCount;
lastBreakAt = lineBreak;
}
} }

View File

@ -2,11 +2,11 @@
#include <EpdFontFamily.h> #include <EpdFontFamily.h>
#include <cstdint>
#include <functional> #include <functional>
#include <list> #include <list>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector>
#include "blocks/TextBlock.h" #include "blocks/TextBlock.h"
@ -18,6 +18,12 @@ class ParsedText {
TextBlock::BLOCK_STYLE style; TextBlock::BLOCK_STYLE style;
bool extraParagraphSpacing; bool extraParagraphSpacing;
std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths) const;
void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
const std::vector<size_t>& lineBreakIndices,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId);
public: public:
explicit ParsedText(const TextBlock::BLOCK_STYLE style, const bool extraParagraphSpacing) explicit ParsedText(const TextBlock::BLOCK_STYLE style, const bool extraParagraphSpacing)
: style(style), extraParagraphSpacing(extraParagraphSpacing) {} : style(style), extraParagraphSpacing(extraParagraphSpacing) {}
@ -26,7 +32,9 @@ class ParsedText {
void addWord(std::string word, EpdFontStyle fontStyle); void addWord(std::string word, EpdFontStyle fontStyle);
void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; } void setStyle(const TextBlock::BLOCK_STYLE style) { this->style = style; }
TextBlock::BLOCK_STYLE getStyle() const { return style; } TextBlock::BLOCK_STYLE getStyle() const { return style; }
size_t size() const { return words.size(); }
bool isEmpty() const { return words.empty(); } bool isEmpty() const { return words.empty(); }
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin, void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, int horizontalMargin,
const std::function<void(std::shared_ptr<TextBlock>)>& processLine); const std::function<void(std::shared_ptr<TextBlock>)>& processLine,
bool includeLastLine = true);
}; };

View File

@ -143,6 +143,17 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
self->partWordBuffer[self->partWordBufferIndex++] = s[i]; self->partWordBuffer[self->partWordBufferIndex++] = s[i];
} }
// If we have > 750 words buffered up, perform the layout and consume out all but the last line
// There should be enough here to build out 1-2 full pages and doing this will free up a lot of
// memory.
// Spotted when reading Intermezzo, there are some really long text blocks in there.
if (self->currentTextBlock->size() > 750) {
Serial.printf("[%lu] [EHP] Text block too long, splitting into multiple pages\n", millis());
self->currentTextBlock->layoutAndExtractLines(
self->renderer, self->fontId, self->marginLeft + self->marginRight,
[self](const std::shared_ptr<TextBlock>& textBlock) { self->addLineToPage(textBlock); }, false);
}
} }
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {