diff --git a/README.md b/README.md index 60a2ed2e..6f484f41 100644 --- a/README.md +++ b/README.md @@ -98,9 +98,9 @@ CrossPoint Reader is pretty aggressive about caching data down to the SD card to has ~380KB of usable RAM, so we have to be careful. A lot of the decisions made in the design of the firmware were based on this constraint. -### EPUB caching +### Data caching -The first time chapters of an EPUB are loaded, they are cached to the SD card. Subsequent loads are served from the +The first time chapters of a book are loaded, they are cached to the SD card. Subsequent loads are served from the cache. This cache directory exists at `.crosspoint` on the SD card. The structure is as follows: @@ -108,25 +108,22 @@ cache. This cache directory exists at `.crosspoint` on the SD card. The structur .crosspoint/ ├── epub_12471232/ # Each EPUB is cached to a subdirectory named `epub_` │ ├── progress.bin # Stores reading progress (chapter, page, etc.) -│ ├── 0/ # Each chapter is stored in a subdirectory named by its index (based on the spine order) -│ │ ├── section.bin # Section metadata (page count) -│ │ ├── page_0.bin # Each page is stored in a separate file, it -│ │ ├── page_1.bin # contains the position (x, y) and text for each word -│ │ └── ... -│ ├── 1/ -│ │ ├── section.bin -│ │ ├── page_0.bin -│ │ ├── page_1.bin -│ │ └── ... -│ └── ... +│ ├── cover.bmp # Book cover image (once generated) +│ ├── book.bin # Book metadata (title, author, spine, table of contents, etc.) +│ └── sections/ # All chapter data is stored in the sections subdirectory +│ ├── 0.bin # Chapter data (screen count, all text layout info, etc.) +│ ├── 1.bin # files are named by their index in the spine +│ └── ... │ └── epub_189013891/ ``` -Deleting the `.crosspoint` directory will clear the cache. +Deleting the `.crosspoint` directory will clear the entire cache. -Due the way it's currently implemented, the cache is not automatically cleared when the EPUB is deleted and moving an -EPUB file will reset the reading progress. +Due the way it's currently implemented, the cache is not automatically cleared when a book is deleted and moving a book +file will use a new cache directory, resetting the reading progress. + +For more details on the internal file structures, see the [file formats document](./docs/file-formats.md). ## Contributing diff --git a/docs/file-formats.md b/docs/file-formats.md new file mode 100644 index 00000000..fb096c88 --- /dev/null +++ b/docs/file-formats.md @@ -0,0 +1,9 @@ +# File Formats + +## `book.bin` + +![](./images/file-formats/book-bin.png) + +## `section.bin` + +![](./images/file-formats/section-bin.png) diff --git a/docs/images/file-formats/book-bin.png b/docs/images/file-formats/book-bin.png new file mode 100644 index 00000000..07d9c2eb Binary files /dev/null and b/docs/images/file-formats/book-bin.png differ diff --git a/docs/images/file-formats/section-bin.png b/docs/images/file-formats/section-bin.png new file mode 100644 index 00000000..9a9691c6 Binary files /dev/null and b/docs/images/file-formats/section-bin.png differ diff --git a/lib/Epub/Epub/Page.cpp b/lib/Epub/Epub/Page.cpp index 15e50d08..c50fe305 100644 --- a/lib/Epub/Epub/Page.cpp +++ b/lib/Epub/Epub/Page.cpp @@ -3,20 +3,16 @@ #include #include -namespace { -constexpr uint8_t PAGE_FILE_VERSION = 3; -} - void PageLine::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) { block->render(renderer, fontId, xPos + xOffset, yPos + yOffset); } -void PageLine::serialize(File& file) { +bool PageLine::serialize(File& file) { serialization::writePod(file, xPos); serialization::writePod(file, yPos); // serialize TextBlock pointed to by PageLine - block->serialize(file); + return block->serialize(file); } std::unique_ptr PageLine::deserialize(File& file) { @@ -35,27 +31,22 @@ void Page::render(GfxRenderer& renderer, const int fontId, const int xOffset, co } } -void Page::serialize(File& file) const { - serialization::writePod(file, PAGE_FILE_VERSION); - +bool Page::serialize(File& file) const { const uint32_t count = elements.size(); serialization::writePod(file, count); for (const auto& el : elements) { // Only PageLine exists currently serialization::writePod(file, static_cast(TAG_PageLine)); - el->serialize(file); + if (!el->serialize(file)) { + return false; + } } + + return true; } std::unique_ptr Page::deserialize(File& file) { - uint8_t version; - serialization::readPod(file, version); - if (version != PAGE_FILE_VERSION) { - Serial.printf("[%lu] [PGE] Deserialization failed: Unknown version %u\n", millis(), version); - return nullptr; - } - auto page = std::unique_ptr(new Page()); uint32_t count; diff --git a/lib/Epub/Epub/Page.h b/lib/Epub/Epub/Page.h index f43e4987..841ef6b9 100644 --- a/lib/Epub/Epub/Page.h +++ b/lib/Epub/Epub/Page.h @@ -18,7 +18,7 @@ class PageElement { explicit PageElement(const int16_t xPos, const int16_t yPos) : xPos(xPos), yPos(yPos) {} virtual ~PageElement() = default; virtual void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) = 0; - virtual void serialize(File& file) = 0; + virtual bool serialize(File& file) = 0; }; // a line from a block element @@ -29,7 +29,7 @@ class PageLine final : public PageElement { PageLine(std::shared_ptr block, const int16_t xPos, const int16_t yPos) : PageElement(xPos, yPos), block(std::move(block)) {} void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override; - void serialize(File& file) override; + bool serialize(File& file) override; static std::unique_ptr deserialize(File& file); }; @@ -38,6 +38,6 @@ class Page { // the list of block index and line numbers on this page std::vector> elements; void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) const; - void serialize(File& file) const; + bool serialize(File& file) const; static std::unique_ptr deserialize(File& file); }; diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 7b815792..9cc19ea8 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -8,54 +8,60 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 6; +constexpr uint8_t SECTION_FILE_VERSION = 7; +constexpr size_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(int) + + sizeof(int) + sizeof(int) + sizeof(size_t); } // namespace -void Section::onPageComplete(std::unique_ptr page) { - const auto filePath = cachePath + "/page_" + std::to_string(pageCount) + ".bin"; - - File outputFile; - if (!FsHelpers::openFileForWrite("SCT", filePath, outputFile)) { - return; +size_t Section::onPageComplete(std::unique_ptr page) { + if (!file) { + Serial.printf("[%lu] [SCT] File not open for writing page %d\n", millis(), pageCount); + return 0; } - page->serialize(outputFile); - outputFile.close(); + const auto position = file.position(); + if (!page->serialize(file)) { + Serial.printf("[%lu] [SCT] Failed to serialize page %d\n", millis(), pageCount); + return 0; + } Serial.printf("[%lu] [SCT] Page %d processed\n", millis(), pageCount); pageCount++; + return position; } -void Section::writeCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const int viewportWidth, const int viewportHeight) const { - File outputFile; - if (!FsHelpers::openFileForWrite("SCT", cachePath + "/section.bin", outputFile)) { +void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing, + const int viewportWidth, const int viewportHeight) { + if (!file) { + Serial.printf("[%lu] [SCT] File not open for writing header\n", millis()); return; } - serialization::writePod(outputFile, SECTION_FILE_VERSION); - serialization::writePod(outputFile, fontId); - serialization::writePod(outputFile, lineCompression); - serialization::writePod(outputFile, extraParagraphSpacing); - serialization::writePod(outputFile, viewportWidth); - serialization::writePod(outputFile, viewportHeight); - serialization::writePod(outputFile, pageCount); - outputFile.close(); + static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) + + sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) + + sizeof(pageCount) + sizeof(size_t), + "Header size mismatch"); + serialization::writePod(file, SECTION_FILE_VERSION); + serialization::writePod(file, fontId); + serialization::writePod(file, lineCompression); + serialization::writePod(file, extraParagraphSpacing); + serialization::writePod(file, viewportWidth); + serialization::writePod(file, viewportHeight); + serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written) + serialization::writePod(file, static_cast(0)); // Placeholder for LUT offset } -bool Section::loadCacheMetadata(const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const int viewportWidth, const int viewportHeight) { - const auto sectionFilePath = cachePath + "/section.bin"; - File inputFile; - if (!FsHelpers::openFileForRead("SCT", sectionFilePath, inputFile)) { +bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing, + const int viewportWidth, const int viewportHeight) { + if (!FsHelpers::openFileForRead("SCT", filePath, file)) { return false; } // Match parameters { uint8_t version; - serialization::readPod(inputFile, version); + serialization::readPod(file, version); if (version != SECTION_FILE_VERSION) { - inputFile.close(); + file.close(); Serial.printf("[%lu] [SCT] Deserialization failed: Unknown version %u\n", millis(), version); clearCache(); return false; @@ -64,41 +70,36 @@ bool Section::loadCacheMetadata(const int fontId, const float lineCompression, c int fileFontId, fileViewportWidth, fileViewportHeight; float fileLineCompression; bool fileExtraParagraphSpacing; - serialization::readPod(inputFile, fileFontId); - serialization::readPod(inputFile, fileLineCompression); - serialization::readPod(inputFile, fileExtraParagraphSpacing); - serialization::readPod(inputFile, fileViewportWidth); - serialization::readPod(inputFile, fileViewportHeight); + serialization::readPod(file, fileFontId); + serialization::readPod(file, fileLineCompression); + serialization::readPod(file, fileExtraParagraphSpacing); + serialization::readPod(file, fileViewportWidth); + serialization::readPod(file, fileViewportHeight); if (fontId != fileFontId || lineCompression != fileLineCompression || extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight) { - inputFile.close(); + file.close(); Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis()); clearCache(); return false; } } - serialization::readPod(inputFile, pageCount); - inputFile.close(); + serialization::readPod(file, pageCount); + file.close(); Serial.printf("[%lu] [SCT] Deserialization succeeded: %d pages\n", millis(), pageCount); return true; } -void Section::setupCacheDir() const { - epub->setupCacheDir(); - SD.mkdir(cachePath.c_str()); -} - // Your updated class method (assuming you are using the 'SD' object, which is a wrapper for a specific filesystem) bool Section::clearCache() const { - if (!SD.exists(cachePath.c_str())) { + if (!SD.exists(filePath.c_str())) { Serial.printf("[%lu] [SCT] Cache does not exist, no action needed\n", millis()); return true; } - if (!FsHelpers::removeDir(cachePath.c_str())) { + if (!SD.remove(filePath.c_str())) { Serial.printf("[%lu] [SCT] Failed to clear cache\n", millis()); return false; } @@ -107,10 +108,10 @@ bool Section::clearCache() const { return true; } -bool Section::persistPageDataToSD(const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const int viewportWidth, const int viewportHeight, - const std::function& progressSetupFn, - const std::function& progressFn) { +bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing, + const int viewportWidth, const int viewportHeight, + const std::function& progressSetupFn, + const std::function& progressFn) { constexpr size_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB const auto localPath = epub->getSpineItem(spineIndex).href; const auto tmpHtmlPath = epub->getCachePath() + "/.tmp_" + std::to_string(spineIndex) + ".html"; @@ -156,30 +157,66 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression, progressSetupFn(); } + if (!FsHelpers::openFileForWrite("SCT", filePath, file)) { + return false; + } + writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight); + std::vector lut = {}; + ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight, - [this](std::unique_ptr page) { this->onPageComplete(std::move(page)); }, progressFn); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, + progressFn); success = visitor.parseAndBuildPages(); SD.remove(tmpHtmlPath.c_str()); if (!success) { Serial.printf("[%lu] [SCT] Failed to parse XML and build pages\n", millis()); + file.close(); + SD.remove(filePath.c_str()); return false; } - writeCacheMetadata(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight); + const auto lutOffset = file.position(); + bool hasFailedLutRecords = false; + // Write LUT + for (const auto& pos : lut) { + if (pos == 0) { + hasFailedLutRecords = true; + break; + } + serialization::writePod(file, pos); + } + if (hasFailedLutRecords) { + Serial.printf("[%lu] [SCT] Failed to write LUT due to invalid page positions\n", millis()); + file.close(); + SD.remove(filePath.c_str()); + return false; + } + + // Go back and write LUT offset + file.seek(HEADER_SIZE - sizeof(size_t) - sizeof(pageCount)); + serialization::writePod(file, pageCount); + serialization::writePod(file, lutOffset); + file.close(); return true; } -std::unique_ptr Section::loadPageFromSD() const { - const auto filePath = cachePath + "/page_" + std::to_string(currentPage) + ".bin"; - - File inputFile; - if (!FsHelpers::openFileForRead("SCT", filePath, inputFile)) { +std::unique_ptr Section::loadPageFromSectionFile() { + if (!FsHelpers::openFileForRead("SCT", filePath, file)) { return nullptr; } - auto page = Page::deserialize(inputFile); - inputFile.close(); + + file.seek(HEADER_SIZE - sizeof(size_t)); + size_t lutOffset; + serialization::readPod(file, lutOffset); + file.seek(lutOffset + sizeof(size_t) * currentPage); + size_t pagePos; + serialization::readPod(file, pagePos); + file.seek(pagePos); + + auto page = Page::deserialize(file); + file.close(); return page; } diff --git a/lib/Epub/Epub/Section.h b/lib/Epub/Epub/Section.h index a1a62163..93e0d6c3 100644 --- a/lib/Epub/Epub/Section.h +++ b/lib/Epub/Epub/Section.h @@ -11,11 +11,12 @@ class Section { std::shared_ptr epub; const int spineIndex; GfxRenderer& renderer; - std::string cachePath; + std::string filePath; + File file; - void writeCacheMetadata(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth, - int viewportHeight) const; - void onPageComplete(std::unique_ptr page); + void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth, + int viewportHeight); + size_t onPageComplete(std::unique_ptr page); public: int pageCount = 0; @@ -25,14 +26,13 @@ class Section { : epub(epub), spineIndex(spineIndex), renderer(renderer), - cachePath(epub->getCachePath() + "/" + std::to_string(spineIndex)) {} + filePath(epub->getCachePath() + "/sections/" + std::to_string(spineIndex) + ".bin") {} ~Section() = default; - bool loadCacheMetadata(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth, - int viewportHeight); - void setupCacheDir() const; + bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth, + int viewportHeight); bool clearCache() const; - bool persistPageDataToSD(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth, - int viewportHeight, const std::function& progressSetupFn = nullptr, - const std::function& progressFn = nullptr); - std::unique_ptr loadPageFromSD() const; + bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, int viewportWidth, + int viewportHeight, const std::function& progressSetupFn = nullptr, + const std::function& progressFn = nullptr); + std::unique_ptr loadPageFromSectionFile(); }; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index ef6fdb5d..c20b37d3 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -24,34 +24,33 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int } } -void TextBlock::serialize(File& file) const { - // words - const uint32_t wc = words.size(); - serialization::writePod(file, wc); +bool TextBlock::serialize(File& file) const { + if (words.size() != wordXpos.size() || words.size() != wordStyles.size()) { + Serial.printf("[%lu] [TXB] Serialization failed: size mismatch (words=%u, xpos=%u, styles=%u)\n", millis(), + words.size(), wordXpos.size(), wordStyles.size()); + return false; + } + + // Word data + serialization::writePod(file, static_cast(words.size())); for (const auto& w : words) serialization::writeString(file, w); - - // wordXpos - const uint32_t xc = wordXpos.size(); - serialization::writePod(file, xc); for (auto x : wordXpos) serialization::writePod(file, x); - - // wordStyles - const uint32_t sc = wordStyles.size(); - serialization::writePod(file, sc); for (auto s : wordStyles) serialization::writePod(file, s); - // style + // Block style serialization::writePod(file, style); + + return true; } std::unique_ptr TextBlock::deserialize(File& file) { - uint32_t wc, xc, sc; + uint32_t wc; std::list words; std::list wordXpos; std::list wordStyles; BLOCK_STYLE style; - // words + // Word count serialization::readPod(file, wc); // Sanity check: prevent allocation of unreasonably large lists (max 10000 words per block) @@ -60,27 +59,15 @@ std::unique_ptr TextBlock::deserialize(File& file) { return nullptr; } + // Word data words.resize(wc); + wordXpos.resize(wc); + wordStyles.resize(wc); for (auto& w : words) serialization::readString(file, w); - - // wordXpos - serialization::readPod(file, xc); - wordXpos.resize(xc); for (auto& x : wordXpos) serialization::readPod(file, x); - - // wordStyles - serialization::readPod(file, sc); - wordStyles.resize(sc); for (auto& s : wordStyles) serialization::readPod(file, s); - // Validate data consistency: all three lists must have the same size - if (wc != xc || wc != sc) { - Serial.printf("[%lu] [TXB] Deserialization failed: size mismatch (words=%u, xpos=%u, styles=%u)\n", millis(), wc, - xc, sc); - return nullptr; - } - - // style + // Block style serialization::readPod(file, style); return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style)); diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 46e320e3..9dfde60f 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -36,6 +36,6 @@ class TextBlock final : public Block { // given a renderer works out where to break the words into lines void render(const GfxRenderer& renderer, int fontId, int x, int y) const; BlockType getType() override { return TEXT_BLOCK; } - void serialize(File& file) const; + bool serialize(File& file) const; static std::unique_ptr deserialize(File& file); }; diff --git a/src/activities/reader/EpubReaderActivity.cpp b/src/activities/reader/EpubReaderActivity.cpp index 3e194149..ebf5b0a7 100644 --- a/src/activities/reader/EpubReaderActivity.cpp +++ b/src/activities/reader/EpubReaderActivity.cpp @@ -254,8 +254,8 @@ void EpubReaderActivity::renderScreen() { const auto viewportWidth = renderer.getScreenWidth() - orientedMarginLeft - orientedMarginRight; const auto viewportHeight = renderer.getScreenHeight() - orientedMarginTop - orientedMarginBottom; - if (!section->loadCacheMetadata(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth, - viewportHeight)) { + if (!section->loadSectionFile(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth, + viewportHeight)) { Serial.printf("[%lu] [ERS] Cache not found, building...\n", millis()); // Progress bar dimensions @@ -282,8 +282,6 @@ void EpubReaderActivity::renderScreen() { pagesUntilFullRefresh = 0; } - section->setupCacheDir(); - // Setup callback - only called for chapters >= 50KB, redraws with progress bar auto progressSetup = [this, boxXWithBar, boxWidthWithBar, boxHeightWithBar, barX, barY] { renderer.fillRect(boxXWithBar, boxY, boxWidthWithBar, boxHeightWithBar, false); @@ -300,8 +298,8 @@ void EpubReaderActivity::renderScreen() { renderer.displayBuffer(EInkDisplay::FAST_REFRESH); }; - if (!section->persistPageDataToSD(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth, - viewportHeight, progressSetup, progressCallback)) { + if (!section->createSectionFile(READER_FONT_ID, lineCompression, SETTINGS.extraParagraphSpacing, viewportWidth, + viewportHeight, progressSetup, progressCallback)) { Serial.printf("[%lu] [ERS] Failed to persist page data to SD\n", millis()); section.reset(); return; @@ -336,7 +334,7 @@ void EpubReaderActivity::renderScreen() { } { - auto p = section->loadPageFromSD(); + auto p = section->loadPageFromSectionFile(); if (!p) { Serial.printf("[%lu] [ERS] Failed to load page from SD - clearing section cache\n", millis()); section->clearCache();