Consolidate chapter page data into single file

Has ~80% speedup to page generation (indexing) time, and makes cache cleanup a lot faster
This commit is contained in:
Dave Allie 2025-12-29 12:26:38 +11:00
parent b1763821b5
commit ea3584a918
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F
4 changed files with 92 additions and 78 deletions

View File

@ -3,10 +3,6 @@
#include <HardwareSerial.h>
#include <Serialization.h>
namespace {
constexpr uint8_t PAGE_FILE_VERSION = 3;
}
void PageLine::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) {
block->render(renderer, fontId, xPos + xOffset, yPos + yOffset);
}
@ -36,8 +32,6 @@ void Page::render(GfxRenderer& renderer, const int fontId, const int xOffset, co
}
void Page::serialize(File& file) const {
serialization::writePod(file, PAGE_FILE_VERSION);
const uint32_t count = elements.size();
serialization::writePod(file, count);
@ -49,13 +43,6 @@ void Page::serialize(File& file) const {
}
std::unique_ptr<Page> Page::deserialize(File& file) {
uint8_t version;
serialization::readPod(file, version);
if (version != PAGE_FILE_VERSION) {
Serial.printf("[%lu] [PGE] Deserialization failed: Unknown version %u\n", millis(), version);
return nullptr;
}
auto page = std::unique_ptr<Page>(new Page());
uint32_t count;

View File

@ -8,54 +8,57 @@
#include "parsers/ChapterHtmlSlimParser.h"
namespace {
constexpr uint8_t SECTION_FILE_VERSION = 6;
constexpr uint8_t SECTION_FILE_VERSION = 7;
constexpr size_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(int) +
sizeof(int) + sizeof(int) + sizeof(size_t);
constexpr char sectionFileName[] = "/section.bin";
} // namespace
void Section::onPageComplete(std::unique_ptr<Page> page) {
const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin";
File outputFile;
if (!FsHelpers::openFileForWrite("SCT", filePath, outputFile)) {
return;
size_t Section::onPageComplete(std::unique_ptr<Page> page) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount);
return 0;
}
page->serialize(outputFile);
outputFile.close();
const auto position = file.position();
page->serialize(file);
Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount);
pageCount++;
return position;
}
void Section::writeCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) const {
File outputFile;
if (!FsHelpers::openFileForWrite("SCT", cachePath + "/section.bin", outputFile)) {
void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!file) {
Serial.printf("[%lu] [SCT] File not open for writing header\n", millis());
return;
}
serialization::writePod(outputFile, SECTION_FILE_VERSION);
serialization::writePod(outputFile, fontId);
serialization::writePod(outputFile, lineCompression);
serialization::writePod(outputFile, extraParagraphSpacing);
serialization::writePod(outputFile, viewportWidth);
serialization::writePod(outputFile, viewportHeight);
serialization::writePod(outputFile, pageCount);
outputFile.close();
static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) +
sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) +
sizeof(pageCount) + sizeof(size_t),
"Header size mismatch");
serialization::writePod(file, SECTION_FILE_VERSION);
serialization::writePod(file, fontId);
serialization::writePod(file, lineCompression);
serialization::writePod(file, extraParagraphSpacing);
serialization::writePod(file, viewportWidth);
serialization::writePod(file, viewportHeight);
serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written)
serialization::writePod(file, static_cast<size_t>(0)); // Placeholder for LUT offset
}
bool Section::loadCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
const auto sectionFilePath = cachePath + "/section.bin";
File inputFile;
if (!FsHelpers::openFileForRead("SCT", sectionFilePath, inputFile)) {
bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight) {
if (!FsHelpers::openFileForRead("SCT", cachePath + sectionFileName, file)) {
return false;
}
// Match parameters
{
uint8_t version;
serialization::readPod(inputFile, version);
serialization::readPod(file, version);
if (version != SECTION_FILE_VERSION) {
inputFile.close();
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Unknown version %u\n", millis(), version);
clearCache();
return false;
@ -64,24 +67,24 @@ bool Section::loadCacheMetadata(const int fontId, const float lineCompression, c
int fileFontId, fileViewportWidth, fileViewportHeight;
float fileLineCompression;
bool fileExtraParagraphSpacing;
serialization::readPod(inputFile, fileFontId);
serialization::readPod(inputFile, fileLineCompression);
serialization::readPod(inputFile, fileExtraParagraphSpacing);
serialization::readPod(inputFile, fileViewportWidth);
serialization::readPod(inputFile, fileViewportHeight);
serialization::readPod(file, fileFontId);
serialization::readPod(file, fileLineCompression);
serialization::readPod(file, fileExtraParagraphSpacing);
serialization::readPod(file, fileViewportWidth);
serialization::readPod(file, fileViewportHeight);
if (fontId != fileFontId || lineCompression != fileLineCompression ||
extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth ||
viewportHeight != fileViewportHeight) {
inputFile.close();
file.close();
Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis());
clearCache();
return false;
}
}
serialization::readPod(inputFile, pageCount);
inputFile.close();
serialization::readPod(file, pageCount);
file.close();
Serial.printf("[%lu] [SCT] Deserialization succeeded: %d pages\n", millis(), pageCount);
return true;
}
@ -107,10 +110,10 @@ bool Section::clearCache() const {
return true;
}
bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing,
const int viewportWidth, const int viewportHeight,
const std::function<void()>& progressSetupFn,
const std::function<void(int)>& progressFn) {
constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB
const auto localPath = epub->getSpineItem(spineIndex).href;
const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html";
@ -156,30 +159,53 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
progressSetupFn();
}
if (!FsHelpers::openFileForWrite("SCT", cachePath + sectionFileName, file)) {
return false;
}
writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
std::vector<size_t> lut = {};
ChapterHtmlSlimParser visitor(
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight,
[this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); }, progressFn);
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
progressFn);
success = visitor.parseAndBuildPages();
SD.remove(tmpHtmlPath.c_str());
if (!success) {
Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis());
file.close();
SD.remove((cachePath + sectionFileName).c_str());
return false;
}
writeCacheMetadata(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight);
const auto lutOffset = file.position();
// Write LUT
for (const auto& pos : lut) {
serialization::writePod(file, pos);
}
// Go back and write LUT offset
file.seek(HEADER_SIZE - sizeof(size_t) - sizeof(pageCount));
serialization::writePod(file, pageCount);
serialization::writePod(file, lutOffset);
file.close();
return true;
}
std::unique_ptr<Page> Section::loadPageFromSD() const {
const auto filePath = cachePath + "/page_" + std::to_string(currentPage) + ".bin";
File inputFile;
if (!FsHelpers::openFileForRead("SCT", filePath, inputFile)) {
std::unique_ptr<Page> Section::loadPageFromSectionFile() {
if (!FsHelpers::openFileForRead("SCT", cachePath + sectionFileName, file)) {
return nullptr;
}
auto page = Page::deserialize(inputFile);
inputFile.close();
file.seek(HEADER_SIZE - sizeof(size_t));
size_t lutOffset;
serialization::readPod(file, lutOffset);
file.seek(lutOffset + sizeof(size_t) * currentPage);
size_t pagePos;
serialization::readPod(file, pagePos);
file.seek(pagePos);
auto page = Page::deserialize(file);
file.close();
return page;
}

View File

@ -12,10 +12,11 @@ class Section {
const int spineIndex;
GfxRenderer& renderer;
std::string cachePath;
File file;
void writeCacheMetadata(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight) const;
void onPageComplete(std::unique_ptr<Page> page);
void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight);
size_t onPageComplete(std::unique_ptr<Page> page);
public:
int pageCount = 0;
@ -27,12 +28,12 @@ class Section {
renderer(renderer),
cachePath(epub->getCachePath() + "/" + std::to_string(spineIndex)) {}
~Section() = default;
bool loadCacheMetadata(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight);
bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight);
void setupCacheDir() const;
bool clearCache() const;
bool persistPageDataToSD(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight, const std::function<void()>& progressSetupFn = nullptr,
const std::function<void(int)>& progressFn = nullptr);
std::unique_ptr<Page> loadPageFromSD() const;
bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth,
int viewportHeight, const std::function<void()>& progressSetupFn = nullptr,
const std::function<void(int)>& progressFn = nullptr);
std::unique_ptr<Page> loadPageFromSectionFile();
};

View File

@ -254,8 +254,8 @@ void EpubReaderActivity::renderScreen() {
const auto viewportWidth = renderer.getScreenWidth() - orientedMarginLeft - orientedMarginRight;
const auto viewportHeight = renderer.getScreenHeight() - orientedMarginTop - orientedMarginBottom;
if (!section->loadCacheMetadata(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth,
viewportHeight)) {
if (!section->loadSectionFile(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth,
viewportHeight)) {
Serial.printf("[%lu] [ERS] Cache not found, building...\n", millis());
// Progress bar dimensions
@ -300,8 +300,8 @@ void EpubReaderActivity::renderScreen() {
renderer.displayBuffer(EInkDisplay::FAST_REFRESH);
};
if (!section->persistPageDataToSD(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth,
viewportHeight, progressSetup, progressCallback)) {
if (!section->createSectionFile(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth,
viewportHeight, progressSetup, progressCallback)) {
Serial.printf("[%lu] [ERS] Failed to persist page data to SD\n", millis());
section.reset();
return;
@ -336,7 +336,7 @@ void EpubReaderActivity::renderScreen() {
}
{
auto p = section->loadPageFromSD();
auto p = section->loadPageFromSectionFile();
if (!p) {
Serial.printf("[%lu] [ERS] Failed to load page from SD - clearing section cache\n", millis());
section->clearCache();