From 071ccb9d1bd95d213c2695e1608d3800d8383697 Mon Sep 17 00:00:00 2001 From: Dave Allie Date: Mon, 29 Dec 2025 20:17:29 +1000 Subject: [PATCH] Custom zip parsing (#140) ## Summary * Use custom zip central directory parsing to lower memory usage when loading zipped epub content --- lib/Epub/Epub.cpp | 17 +- lib/Epub/Epub.h | 3 +- lib/Epub/Epub/BookMetadataCache.cpp | 25 +- lib/Epub/Epub/parsers/ContentOpfParser.cpp | 3 +- lib/FsHelpers/FsHelpers.cpp | 1 + lib/ZipFile/ZipFile.cpp | 366 ++++++++++++++++----- lib/ZipFile/ZipFile.h | 50 ++- 7 files changed, 358 insertions(+), 107 deletions(-) diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index b48d7ea3..941e11ba 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -60,9 +60,6 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { } ContentOpfParser opfParser(getCachePath(), getBasePath(), contentOpfSize, bookMetadataCache.get()); - Serial.printf("[%lu] [MEM] Free: %d bytes, Total: %d bytes, Min Free: %d bytes\n", millis(), ESP.getFreeHeap(), - ESP.getHeapSize(), ESP.getMinFreeHeap()); - if (!opfParser.setup()) { Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis()); return false; @@ -321,10 +318,9 @@ bool Epub::generateCoverBmp() const { } uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const { - const ZipFile zip("/sd" + filepath); const std::string path = FsHelpers::normalisePath(itemHref); - const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte); + const auto content = ZipFile(filepath).readFileToMemory(path.c_str(), size, trailingNullByte); if (!content) { Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str()); return nullptr; @@ -334,20 +330,13 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size } bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const { - const ZipFile zip("/sd" + filepath); const std::string path = FsHelpers::normalisePath(itemHref); - - return zip.readFileToStream(path.c_str(), out, chunkSize); + return ZipFile(filepath).readFileToStream(path.c_str(), out, chunkSize); } bool Epub::getItemSize(const std::string& itemHref, size_t* size) const { - const ZipFile zip("/sd" + filepath); - return getItemSize(zip, itemHref, size); -} - -bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size) { const std::string path = FsHelpers::normalisePath(itemHref); - return zip.getInflatedFileSize(path.c_str(), size); + return ZipFile(filepath).getInflatedFileSize(path.c_str(), size); } int Epub::getSpineItemsCount() const { diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index acdd32c8..c7850081 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -24,7 +24,6 @@ class Epub { bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; - static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size); public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -54,5 +53,5 @@ class Epub { size_t getCumulativeSpineItemSize(int spineIndex) const; size_t getBookSize() const; - uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead) const; + uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const; }; diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 3cef851a..8fcee282 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -122,7 +122,26 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // LUTs complete // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin - const ZipFile zip("/sd" + epubPath); + ZipFile zip(epubPath); + // Pre-open zip file to speed up size calculations + if (!zip.open()) { + Serial.printf("[%lu] [BMC] Could not open EPUB zip for size calculations\n", millis()); + bookFile.close(); + spineFile.close(); + tocFile.close(); + return false; + } + // TODO: For large ZIPs loading the all localHeaderOffsets will crash. + // However not having them loaded is extremely slow. Need a better solution here. + // Perhaps only a cache of spine items or a better way to speedup lookups? + if (!zip.loadAllFileStatSlims()) { + Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis()); + bookFile.close(); + spineFile.close(); + tocFile.close(); + zip.close(); + return false; + } size_t cumSize = 0; spineFile.seek(0); for (int i = 0; i < spineCount; i++) { @@ -157,6 +176,8 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta // Write out spine data to book.bin writeSpineEntry(bookFile, spineEntry); } + // Close opened zip file + zip.close(); // Loop through toc entries from toc file writing to book.bin tocFile.seek(0); @@ -223,6 +244,8 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri int spineIndex = -1; // find spine index + // TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size. + // But perhaps we can load just the hrefs in a vector/list to do an index lookup? spineFile.seek(0); for (int i = 0; i < spineCount; i++) { auto spineEntry = readSpineEntry(spineFile); diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index 3cc64014..a62b2d0a 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include "../BookMetadataCache.h" @@ -183,6 +182,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name if (strcmp(atts[i], "idref") == 0) { const std::string idref = atts[i + 1]; // Resolve the idref to href using items map + // TODO: This lookup is slow as need to scan through all items each time. + // It can take up to 200ms per item when getting to 1500 items. self->tempItemStore.seek(0); std::string itemId; std::string href; diff --git a/lib/FsHelpers/FsHelpers.cpp b/lib/FsHelpers/FsHelpers.cpp index 06f3dfe6..c8b59ce8 100644 --- a/lib/FsHelpers/FsHelpers.cpp +++ b/lib/FsHelpers/FsHelpers.cpp @@ -6,6 +6,7 @@ bool FsHelpers::openFileForRead(const char* moduleName, const char* path, File& file) { if (!SD.exists(path)) { + Serial.printf("[%lu] [%s] File does not exist: %s\n", millis(), moduleName, path); return false; } diff --git a/lib/ZipFile/ZipFile.cpp b/lib/ZipFile/ZipFile.cpp index 83b11848..23cf0e8d 100644 --- a/lib/ZipFile/ZipFile.cpp +++ b/lib/ZipFile/ZipFile.cpp @@ -1,5 +1,6 @@ #include "ZipFile.h" +#include #include #include @@ -27,45 +28,135 @@ bool inflateOneShot(const uint8_t* inputBuf, const size_t deflatedSize, uint8_t* return true; } -ZipFile::ZipFile(std::string filePath) : filePath(std::move(filePath)) { - const bool status = mz_zip_reader_init_file(&zipArchive, this->filePath.c_str(), 0); - - if (!status) { - Serial.printf("[%lu] [ZIP] mz_zip_reader_init_file() failed for %s! Error: %s\n", millis(), this->filePath.c_str(), - mz_zip_get_error_string(zipArchive.m_last_error)); - } -} - -bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat) const { - // find the file - mz_uint32 fileIndex = 0; - if (!mz_zip_reader_locate_file_v2(&zipArchive, filename, nullptr, 0, &fileIndex)) { - Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis(), filename); +bool ZipFile::loadAllFileStatSlims() { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { return false; } - if (!mz_zip_reader_file_stat(&zipArchive, fileIndex, fileStat)) { - Serial.printf("[%lu] [ZIP] mz_zip_reader_file_stat() failed! Error: %s\n", millis(), - mz_zip_get_error_string(zipArchive.m_last_error)); + if (!loadZipDetails()) { + if (!wasOpen) { + close(); + } return false; } + + file.seek(zipDetails.centralDirOffset); + + uint32_t sig; + char itemName[256]; + fileStatSlimCache.clear(); + fileStatSlimCache.reserve(zipDetails.totalEntries); + + while (file.available()) { + file.read(reinterpret_cast(&sig), 4); + if (sig != 0x02014b50) break; // End of list + + FileStatSlim fileStat = {}; + + file.seek(6, SeekCur); + file.read(reinterpret_cast(&fileStat.method), 2); + file.seek(8, SeekCur); + file.read(reinterpret_cast(&fileStat.compressedSize), 4); + file.read(reinterpret_cast(&fileStat.uncompressedSize), 4); + uint16_t nameLen, m, k; + file.read(reinterpret_cast(&nameLen), 2); + file.read(reinterpret_cast(&m), 2); + file.read(reinterpret_cast(&k), 2); + file.seek(8, SeekCur); + file.read(reinterpret_cast(&fileStat.localHeaderOffset), 4); + file.read(reinterpret_cast(itemName), nameLen); + itemName[nameLen] = '\0'; + + fileStatSlimCache.emplace(itemName, fileStat); + + // Skip the rest of this entry (extra field + comment) + file.seek(m + k, SeekCur); + } + + if (!wasOpen) { + close(); + } return true; } -long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const { +bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) { + if (!fileStatSlimCache.empty()) { + const auto it = fileStatSlimCache.find(filename); + if (it != fileStatSlimCache.end()) { + *fileStat = it->second; + return true; + } + return false; + } + + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return false; + } + + if (!loadZipDetails()) { + if (!wasOpen) { + close(); + } + return false; + } + + file.seek(zipDetails.centralDirOffset); + + uint32_t sig; + char itemName[256]; + bool found = false; + + while (file.available()) { + file.read(reinterpret_cast(&sig), 4); + if (sig != 0x02014b50) break; // End of list + + file.seek(6, SeekCur); + file.read(reinterpret_cast(&fileStat->method), 2); + file.seek(8, SeekCur); + file.read(reinterpret_cast(&fileStat->compressedSize), 4); + file.read(reinterpret_cast(&fileStat->uncompressedSize), 4); + uint16_t nameLen, m, k; + file.read(reinterpret_cast(&nameLen), 2); + file.read(reinterpret_cast(&m), 2); + file.read(reinterpret_cast(&k), 2); + file.seek(8, SeekCur); + file.read(reinterpret_cast(&fileStat->localHeaderOffset), 4); + file.read(reinterpret_cast(itemName), nameLen); + itemName[nameLen] = '\0'; + + if (strcmp(itemName, filename) == 0) { + found = true; + break; + } + + // Skip the rest of this entry (extra field + comment) + file.seek(m + k, SeekCur); + } + + if (!wasOpen) { + close(); + } + return found; +} + +long ZipFile::getDataOffset(const FileStatSlim& fileStat) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return -1; + } + constexpr auto localHeaderSize = 30; uint8_t pLocalHeader[localHeaderSize]; - const uint64_t fileOffset = fileStat.m_local_header_ofs; + const uint64_t fileOffset = fileStat.localHeaderOffset; - FILE* file = fopen(filePath.c_str(), "r"); - if (!file) { - Serial.printf("[%lu] [ZIP] Failed to open file for reading local header\n", millis()); - return -1; + file.seek(fileOffset); + const size_t read = file.read(pLocalHeader, localHeaderSize); + if (!wasOpen) { + close(); } - fseek(file, fileOffset, SEEK_SET); - const size_t read = fread(pLocalHeader, 1, localHeaderSize, file); - fclose(file); if (read != localHeaderSize) { Serial.printf("[%lu] [ZIP] Something went wrong reading the local header\n", millis()); @@ -83,48 +174,140 @@ long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const { return fileOffset + localHeaderSize + filenameLength + extraOffset; } -bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) const { - mz_zip_archive_file_stat fileStat; - if (!loadFileStat(filename, &fileStat)) { +bool ZipFile::loadZipDetails() { + if (zipDetails.isSet) { + return true; + } + + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { return false; } - *size = static_cast(fileStat.m_uncomp_size); + const size_t fileSize = file.size(); + if (fileSize < 22) { + Serial.printf("[%lu] [ZIP] File too small to be a valid zip\n", millis()); + if (!wasOpen) { + close(); + } + return false; // Minimum EOCD size is 22 bytes + } + + // We scan the last 1KB (or the whole file if smaller) for the EOCD signature + // 0x06054b50 is stored as 0x50, 0x4b, 0x05, 0x06 in little-endian + const int scanRange = fileSize > 1024 ? 1024 : fileSize; + const auto buffer = static_cast(malloc(scanRange)); + if (!buffer) { + Serial.printf("[%lu] [ZIP] Failed to allocate memory for EOCD scan buffer\n", millis()); + if (!wasOpen) { + close(); + } + return false; + } + + file.seek(fileSize - scanRange); + file.read(buffer, scanRange); + + // Scan backwards for the signature + int foundOffset = -1; + for (int i = scanRange - 22; i >= 0; i--) { + constexpr uint32_t signature = 0x06054b50; + if (*reinterpret_cast(&buffer[i]) == signature) { + foundOffset = i; + break; + } + } + + if (foundOffset == -1) { + Serial.printf("[%lu] [ZIP] EOCD signature not found in zip file\n", millis()); + free(buffer); + if (!wasOpen) { + close(); + } + return false; + } + + // Now extract the values we need from the EOCD record + // Relative positions within EOCD: + // Offset 10: Total number of entries (2 bytes) + // Offset 16: Offset of start of central directory with respect to the starting disk number (4 bytes) + zipDetails.totalEntries = *reinterpret_cast(&buffer[foundOffset + 10]); + zipDetails.centralDirOffset = *reinterpret_cast(&buffer[foundOffset + 16]); + zipDetails.isSet = true; + + free(buffer); + if (!wasOpen) { + close(); + } return true; } -uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) const { - mz_zip_archive_file_stat fileStat; - if (!loadFileStat(filename, &fileStat)) { +bool ZipFile::open() { + if (!FsHelpers::openFileForRead("ZIP", filePath, file)) { + return false; + } + return true; +} + +bool ZipFile::close() { + if (file) { + file.close(); + } + return true; +} + +bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) { + FileStatSlim fileStat = {}; + if (!loadFileStatSlim(filename, &fileStat)) { + return false; + } + + *size = static_cast(fileStat.uncompressedSize); + return true; +} + +uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return nullptr; + } + + FileStatSlim fileStat = {}; + if (!loadFileStatSlim(filename, &fileStat)) { + if (!wasOpen) { + close(); + } return nullptr; } const long fileOffset = getDataOffset(fileStat); if (fileOffset < 0) { + if (!wasOpen) { + close(); + } return nullptr; } - FILE* file = fopen(filePath.c_str(), "rb"); - if (!file) { - Serial.printf("[%lu] [ZIP] Failed to open file for reading\n", millis()); - return nullptr; - } - fseek(file, fileOffset, SEEK_SET); + file.seek(fileOffset); - const auto deflatedDataSize = static_cast(fileStat.m_comp_size); - const auto inflatedDataSize = static_cast(fileStat.m_uncomp_size); + const auto deflatedDataSize = fileStat.compressedSize; + const auto inflatedDataSize = fileStat.uncompressedSize; const auto dataSize = trailingNullByte ? inflatedDataSize + 1 : inflatedDataSize; const auto data = static_cast(malloc(dataSize)); if (data == nullptr) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for output buffer (%zu bytes)\n", millis(), dataSize); - fclose(file); + if (!wasOpen) { + close(); + } return nullptr; } - if (fileStat.m_method == MZ_NO_COMPRESSION) { + if (fileStat.method == MZ_NO_COMPRESSION) { // no deflation, just read content - const size_t dataRead = fread(data, 1, inflatedDataSize, file); - fclose(file); + const size_t dataRead = file.read(data, inflatedDataSize); + if (!wasOpen) { + close(); + } if (dataRead != inflatedDataSize) { Serial.printf("[%lu] [ZIP] Failed to read data\n", millis()); @@ -133,17 +316,21 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo } // Continue out of block with data set - } else if (fileStat.m_method == MZ_DEFLATED) { + } else if (fileStat.method == MZ_DEFLATED) { // Read out deflated content from file const auto deflatedData = static_cast(malloc(deflatedDataSize)); if (deflatedData == nullptr) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for decompression buffer\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } return nullptr; } - const size_t dataRead = fread(deflatedData, 1, deflatedDataSize, file); - fclose(file); + const size_t dataRead = file.read(deflatedData, deflatedDataSize); + if (!wasOpen) { + close(); + } if (dataRead != deflatedDataSize) { Serial.printf("[%lu] [ZIP] Failed to read data, expected %d got %d\n", millis(), deflatedDataSize, dataRead); @@ -152,7 +339,7 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo return nullptr; } - bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize); + const bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize); free(deflatedData); if (!success) { @@ -164,7 +351,9 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo // Continue out of block with data set } else { Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } return nullptr; } @@ -173,9 +362,14 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo return data; } -bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t chunkSize) const { - mz_zip_archive_file_stat fileStat; - if (!loadFileStat(filename, &fileStat)) { +bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t chunkSize) { + const bool wasOpen = isOpen(); + if (!wasOpen && !open()) { + return false; + } + + FileStatSlim fileStat = {}; + if (!loadFileStatSlim(filename, &fileStat)) { return false; } @@ -184,32 +378,30 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch return false; } - FILE* file = fopen(filePath.c_str(), "rb"); - if (!file) { - Serial.printf("[%lu] [ZIP] Failed to open file for streaming\n", millis()); - return false; - } - fseek(file, fileOffset, SEEK_SET); + file.seek(fileOffset); + const auto deflatedDataSize = fileStat.compressedSize; + const auto inflatedDataSize = fileStat.uncompressedSize; - const auto deflatedDataSize = static_cast(fileStat.m_comp_size); - const auto inflatedDataSize = static_cast(fileStat.m_uncomp_size); - - if (fileStat.m_method == MZ_NO_COMPRESSION) { + if (fileStat.method == MZ_NO_COMPRESSION) { // no deflation, just read content const auto buffer = static_cast(malloc(chunkSize)); if (!buffer) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for buffer\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } return false; } size_t remaining = inflatedDataSize; while (remaining > 0) { - const size_t dataRead = fread(buffer, 1, remaining < chunkSize ? remaining : chunkSize, file); + const size_t dataRead = file.read(buffer, remaining < chunkSize ? remaining : chunkSize); if (dataRead == 0) { Serial.printf("[%lu] [ZIP] Could not read more bytes\n", millis()); free(buffer); - fclose(file); + if (!wasOpen) { + close(); + } return false; } @@ -217,17 +409,21 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch remaining -= dataRead; } - fclose(file); + if (!wasOpen) { + close(); + } free(buffer); return true; } - if (fileStat.m_method == MZ_DEFLATED) { + if (fileStat.method == MZ_DEFLATED) { // Setup inflator const auto inflator = static_cast(malloc(sizeof(tinfl_decompressor))); if (!inflator) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for inflator\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } return false; } memset(inflator, 0, sizeof(tinfl_decompressor)); @@ -238,7 +434,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch if (!fileReadBuffer) { Serial.printf("[%lu] [ZIP] Failed to allocate memory for zip file read buffer\n", millis()); free(inflator); - fclose(file); + if (!wasOpen) { + close(); + } return false; } @@ -247,7 +445,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch Serial.printf("[%lu] [ZIP] Failed to allocate memory for dictionary\n", millis()); free(inflator); free(fileReadBuffer); - fclose(file); + if (!wasOpen) { + close(); + } return false; } memset(outputBuffer, 0, TINFL_LZ_DICT_SIZE); @@ -267,7 +467,7 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch } fileReadBufferFilledBytes = - fread(fileReadBuffer, 1, fileRemainingBytes < chunkSize ? fileRemainingBytes : chunkSize, file); + file.read(fileReadBuffer, fileRemainingBytes < chunkSize ? fileRemainingBytes : chunkSize); fileRemainingBytes -= fileReadBufferFilledBytes; fileReadBufferCursor = 0; @@ -294,7 +494,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch processedOutputBytes += outBytes; if (out.write(outputBuffer + outputCursor, outBytes) != outBytes) { Serial.printf("[%lu] [ZIP] Failed to write all output bytes to stream\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } free(outputBuffer); free(fileReadBuffer); free(inflator); @@ -306,7 +508,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch if (status < 0) { Serial.printf("[%lu] [ZIP] tinfl_decompress() failed with status %d\n", millis(), status); - fclose(file); + if (!wasOpen) { + close(); + } free(outputBuffer); free(fileReadBuffer); free(inflator); @@ -316,7 +520,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch if (status == TINFL_STATUS_DONE) { Serial.printf("[%lu] [ZIP] Decompressed %d bytes into %d bytes\n", millis(), deflatedDataSize, inflatedDataSize); - fclose(file); + if (!wasOpen) { + close(); + } free(inflator); free(fileReadBuffer); free(outputBuffer); @@ -326,13 +532,19 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch // If we get here, EOF reached without TINFL_STATUS_DONE Serial.printf("[%lu] [ZIP] Unexpected EOF\n", millis()); - fclose(file); + if (!wasOpen) { + close(); + } free(outputBuffer); free(fileReadBuffer); free(inflator); return false; } + if (!wasOpen) { + close(); + } + Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis()); return false; } diff --git a/lib/ZipFile/ZipFile.h b/lib/ZipFile/ZipFile.h index 58e3ab91..4758f161 100644 --- a/lib/ZipFile/ZipFile.h +++ b/lib/ZipFile/ZipFile.h @@ -1,20 +1,46 @@ #pragma once -#include +#include #include - -#include "miniz.h" +#include class ZipFile { - std::string filePath; - mutable mz_zip_archive zipArchive = {}; - bool loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat) const; - long getDataOffset(const mz_zip_archive_file_stat& fileStat) const; + public: + struct FileStatSlim { + uint16_t method; // Compression method + uint32_t compressedSize; // Compressed size + uint32_t uncompressedSize; // Uncompressed size + uint32_t localHeaderOffset; // Offset of local file header + }; + + struct ZipDetails { + uint32_t centralDirOffset; + uint16_t totalEntries; + bool isSet; + }; + + private: + const std::string& filePath; + File file; + ZipDetails zipDetails = {0, 0, false}; + std::unordered_map fileStatSlimCache; + + bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat); + long getDataOffset(const FileStatSlim& fileStat); + bool loadZipDetails(); public: - explicit ZipFile(std::string filePath); - ~ZipFile() { mz_zip_reader_end(&zipArchive); } - bool getInflatedFileSize(const char* filename, size_t* size) const; - uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false) const; - bool readFileToStream(const char* filename, Print& out, size_t chunkSize) const; + explicit ZipFile(const std::string& filePath) : filePath(filePath) {} + ~ZipFile() = default; + // Zip file can be opened and closed by hand in order to allow for quick calculation of inflated file size + // It is NOT recommended to pre-open it for any kind of inflation due to memory constraints + bool isOpen() const { return !!file; } + bool open(); + bool close(); + bool loadAllFileStatSlims(); + bool getInflatedFileSize(const char* filename, size_t* size); + // Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple + // These functions will open and close the zip as needed + uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false); + bool readFileToStream(const char* filename, Print& out, size_t chunkSize); };