From 4c3b4f42adf6686faf2693e67c4fa93a72680429 Mon Sep 17 00:00:00 2001 From: Dave Allie Date: Mon, 29 Dec 2025 16:06:00 +1100 Subject: [PATCH] Cleanup and TODO --- lib/Epub/Epub/BookMetadataCache.cpp | 5 +++++ lib/Epub/Epub/parsers/ContentOpfParser.cpp | 2 ++ lib/ZipFile/ZipFile.cpp | 16 +++++++++++----- lib/ZipFile/ZipFile.h | 4 ++-- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 2231e18d..e89cce93 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -131,6 +131,9 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta tocFile.close(); return false; } + // TODO: For large ZIPs loading the all localHeaderOffsets will crash. + // However not having them loaded is extremely slow. Need a better solution here. + // Perhaps only a cache of spine items or a better way to speedup lookups? if (!zip.loadAllLocalHeaderOffsets()) { Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis()); bookFile.close(); @@ -241,6 +244,8 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri int spineIndex = -1; // find spine index + // TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size. + // But perhaps we can load just the hrefs in a vector/list to do an index lookup? spineFile.seek(0); for (int i = 0; i < spineCount; i++) { auto spineEntry = readSpineEntry(spineFile); diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index 801f5c24..a62b2d0a 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -182,6 +182,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name if (strcmp(atts[i], "idref") == 0) { const std::string idref = atts[i + 1]; // Resolve the idref to href using items map + // TODO: This lookup is slow as need to scan through all items each time. + // It can take up to 200ms per item when getting to 1500 items. self->tempItemStore.seek(0); std::string itemId; std::string href; diff --git a/lib/ZipFile/ZipFile.cpp b/lib/ZipFile/ZipFile.cpp index 48ff6575..b26067b2 100644 --- a/lib/ZipFile/ZipFile.cpp +++ b/lib/ZipFile/ZipFile.cpp @@ -47,6 +47,9 @@ bool ZipFile::loadAllLocalHeaderOffsets() { uint32_t sig; char itemName[256]; + localHeaderOffsets.clear(); + localHeaderOffsets.reserve(zipDetails.totalEntries); + while (file.available()) { file.read(reinterpret_cast(&sig), 4); if (sig != 0x02014b50) break; // End of list @@ -77,11 +80,14 @@ bool ZipFile::loadAllLocalHeaderOffsets() { } bool ZipFile::loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset) { - if (localHeaderOffsets.count(filename) > 0) { - *localHeaderOffset = localHeaderOffsets.at(filename); - Serial.printf("[%lu] [ZIP] Found cached local header offset for file: %s (LHO: %lu)\n", millis(), filename, - static_cast(*localHeaderOffset)); - return true; + // If we have saved any offset, assume they're all loaded + if (!localHeaderOffsets.empty()) { + if (localHeaderOffsets.count(filename) > 0) { + *localHeaderOffset = localHeaderOffsets.at(filename); + return true; + } + + return false; } const bool wasOpen = isOpen(); diff --git a/lib/ZipFile/ZipFile.h b/lib/ZipFile/ZipFile.h index 5d5dd4dc..bc86ea85 100644 --- a/lib/ZipFile/ZipFile.h +++ b/lib/ZipFile/ZipFile.h @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include class ZipFile { public: @@ -23,7 +23,7 @@ class ZipFile { const std::string& filePath; File file; ZipDetails zipDetails = {0, 0, false}; - std::map localHeaderOffsets; + std::unordered_map localHeaderOffsets; bool loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset); bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat);