Custom zip parsing (#140)

## Summary

* Use custom zip central directory parsing to lower memory usage when
loading zipped epub content
This commit is contained in:
Dave Allie 2025-12-29 20:17:29 +10:00 committed by GitHub
parent d7f4bd54f5
commit 071ccb9d1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 358 additions and 107 deletions

View File

@ -60,9 +60,6 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
} }
ContentOpfParser opfParser(getCachePath(), getBasePath(), contentOpfSize, bookMetadataCache.get()); ContentOpfParser opfParser(getCachePath(), getBasePath(), contentOpfSize, bookMetadataCache.get());
Serial.printf("[%lu] [MEM] Free: %d bytes, Total: %d bytes, Min Free: %d bytes\n", millis(), ESP.getFreeHeap(),
ESP.getHeapSize(), ESP.getMinFreeHeap());
if (!opfParser.setup()) { if (!opfParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis()); Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
return false; return false;
@ -321,10 +318,9 @@ bool Epub::generateCoverBmp() const {
} }
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const { uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const {
const ZipFile zip("/sd" + filepath);
const std::string path = FsHelpers::normalisePath(itemHref); const std::string path = FsHelpers::normalisePath(itemHref);
const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte); const auto content = ZipFile(filepath).readFileToMemory(path.c_str(), size, trailingNullByte);
if (!content) { if (!content) {
Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str()); Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str());
return nullptr; return nullptr;
@ -334,20 +330,13 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size
} }
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const { bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
const ZipFile zip("/sd" + filepath);
const std::string path = FsHelpers::normalisePath(itemHref); const std::string path = FsHelpers::normalisePath(itemHref);
return ZipFile(filepath).readFileToStream(path.c_str(), out, chunkSize);
return zip.readFileToStream(path.c_str(), out, chunkSize);
} }
bool Epub::getItemSize(const std::string& itemHref, size_t* size) const { bool Epub::getItemSize(const std::string& itemHref, size_t* size) const {
const ZipFile zip("/sd" + filepath);
return getItemSize(zip, itemHref, size);
}
bool Epub::getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size) {
const std::string path = FsHelpers::normalisePath(itemHref); const std::string path = FsHelpers::normalisePath(itemHref);
return zip.getInflatedFileSize(path.c_str(), size); return ZipFile(filepath).getInflatedFileSize(path.c_str(), size);
} }
int Epub::getSpineItemsCount() const { int Epub::getSpineItemsCount() const {

View File

@ -24,7 +24,6 @@ class Epub {
bool findContentOpfFile(std::string* contentOpfFile) const; bool findContentOpfFile(std::string* contentOpfFile) const;
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
bool parseTocNcxFile() const; bool parseTocNcxFile() const;
static bool getItemSize(const ZipFile& zip, const std::string& itemHref, size_t* size);
public: public:
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
@ -54,5 +53,5 @@ class Epub {
size_t getCumulativeSpineItemSize(int spineIndex) const; size_t getCumulativeSpineItemSize(int spineIndex) const;
size_t getBookSize() const; size_t getBookSize() const;
uint8_t calculateProgress(const int currentSpineIndex, const float currentSpineRead) const; uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const;
}; };

View File

@ -122,7 +122,26 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
// LUTs complete // LUTs complete
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin // Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
const ZipFile zip("/sd" + epubPath); ZipFile zip(epubPath);
// Pre-open zip file to speed up size calculations
if (!zip.open()) {
Serial.printf("[%lu] [BMC] Could not open EPUB zip for size calculations\n", millis());
bookFile.close();
spineFile.close();
tocFile.close();
return false;
}
// TODO: For large ZIPs loading the all localHeaderOffsets will crash.
// However not having them loaded is extremely slow. Need a better solution here.
// Perhaps only a cache of spine items or a better way to speedup lookups?
if (!zip.loadAllFileStatSlims()) {
Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis());
bookFile.close();
spineFile.close();
tocFile.close();
zip.close();
return false;
}
size_t cumSize = 0; size_t cumSize = 0;
spineFile.seek(0); spineFile.seek(0);
for (int i = 0; i < spineCount; i++) { for (int i = 0; i < spineCount; i++) {
@ -157,6 +176,8 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
// Write out spine data to book.bin // Write out spine data to book.bin
writeSpineEntry(bookFile, spineEntry); writeSpineEntry(bookFile, spineEntry);
} }
// Close opened zip file
zip.close();
// Loop through toc entries from toc file writing to book.bin // Loop through toc entries from toc file writing to book.bin
tocFile.seek(0); tocFile.seek(0);
@ -223,6 +244,8 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri
int spineIndex = -1; int spineIndex = -1;
// find spine index // find spine index
// TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size.
// But perhaps we can load just the hrefs in a vector/list to do an index lookup?
spineFile.seek(0); spineFile.seek(0);
for (int i = 0; i < spineCount; i++) { for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile); auto spineEntry = readSpineEntry(spineFile);

View File

@ -3,7 +3,6 @@
#include <FsHelpers.h> #include <FsHelpers.h>
#include <HardwareSerial.h> #include <HardwareSerial.h>
#include <Serialization.h> #include <Serialization.h>
#include <ZipFile.h>
#include "../BookMetadataCache.h" #include "../BookMetadataCache.h"
@ -183,6 +182,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
if (strcmp(atts[i], "idref") == 0) { if (strcmp(atts[i], "idref") == 0) {
const std::string idref = atts[i + 1]; const std::string idref = atts[i + 1];
// Resolve the idref to href using items map // Resolve the idref to href using items map
// TODO: This lookup is slow as need to scan through all items each time.
// It can take up to 200ms per item when getting to 1500 items.
self->tempItemStore.seek(0); self->tempItemStore.seek(0);
std::string itemId; std::string itemId;
std::string href; std::string href;

View File

@ -6,6 +6,7 @@
bool FsHelpers::openFileForRead(const char* moduleName, const char* path, File& file) { bool FsHelpers::openFileForRead(const char* moduleName, const char* path, File& file) {
if (!SD.exists(path)) { if (!SD.exists(path)) {
Serial.printf("[%lu] [%s] File does not exist: %s\n", millis(), moduleName, path);
return false; return false;
} }

View File

@ -1,5 +1,6 @@
#include "ZipFile.h" #include "ZipFile.h"
#include <FsHelpers.h>
#include <HardwareSerial.h> #include <HardwareSerial.h>
#include <miniz.h> #include <miniz.h>
@ -27,45 +28,135 @@ bool inflateOneShot(const uint8_t* inputBuf, const size_t deflatedSize, uint8_t*
return true; return true;
} }
ZipFile::ZipFile(std::string filePath) : filePath(std::move(filePath)) { bool ZipFile::loadAllFileStatSlims() {
const bool status = mz_zip_reader_init_file(&zipArchive, this->filePath.c_str(), 0); const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
if (!status) {
Serial.printf("[%lu] [ZIP] mz_zip_reader_init_file() failed for %s! Error: %s\n", millis(), this->filePath.c_str(),
mz_zip_get_error_string(zipArchive.m_last_error));
}
}
bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat) const {
// find the file
mz_uint32 fileIndex = 0;
if (!mz_zip_reader_locate_file_v2(&zipArchive, filename, nullptr, 0, &fileIndex)) {
Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis(), filename);
return false; return false;
} }
if (!mz_zip_reader_file_stat(&zipArchive, fileIndex, fileStat)) { if (!loadZipDetails()) {
Serial.printf("[%lu] [ZIP] mz_zip_reader_file_stat() failed! Error: %s\n", millis(), if (!wasOpen) {
mz_zip_get_error_string(zipArchive.m_last_error)); close();
}
return false; return false;
} }
file.seek(zipDetails.centralDirOffset);
uint32_t sig;
char itemName[256];
fileStatSlimCache.clear();
fileStatSlimCache.reserve(zipDetails.totalEntries);
while (file.available()) {
file.read(reinterpret_cast<uint8_t*>(&sig), 4);
if (sig != 0x02014b50) break; // End of list
FileStatSlim fileStat = {};
file.seek(6, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&fileStat.method), 2);
file.seek(8, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&fileStat.compressedSize), 4);
file.read(reinterpret_cast<uint8_t*>(&fileStat.uncompressedSize), 4);
uint16_t nameLen, m, k;
file.read(reinterpret_cast<uint8_t*>(&nameLen), 2);
file.read(reinterpret_cast<uint8_t*>(&m), 2);
file.read(reinterpret_cast<uint8_t*>(&k), 2);
file.seek(8, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&fileStat.localHeaderOffset), 4);
file.read(reinterpret_cast<uint8_t*>(itemName), nameLen);
itemName[nameLen] = '\0';
fileStatSlimCache.emplace(itemName, fileStat);
// Skip the rest of this entry (extra field + comment)
file.seek(m + k, SeekCur);
}
if (!wasOpen) {
close();
}
return true; return true;
} }
long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const { bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) {
if (!fileStatSlimCache.empty()) {
const auto it = fileStatSlimCache.find(filename);
if (it != fileStatSlimCache.end()) {
*fileStat = it->second;
return true;
}
return false;
}
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return false;
}
if (!loadZipDetails()) {
if (!wasOpen) {
close();
}
return false;
}
file.seek(zipDetails.centralDirOffset);
uint32_t sig;
char itemName[256];
bool found = false;
while (file.available()) {
file.read(reinterpret_cast<uint8_t*>(&sig), 4);
if (sig != 0x02014b50) break; // End of list
file.seek(6, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&fileStat->method), 2);
file.seek(8, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&fileStat->compressedSize), 4);
file.read(reinterpret_cast<uint8_t*>(&fileStat->uncompressedSize), 4);
uint16_t nameLen, m, k;
file.read(reinterpret_cast<uint8_t*>(&nameLen), 2);
file.read(reinterpret_cast<uint8_t*>(&m), 2);
file.read(reinterpret_cast<uint8_t*>(&k), 2);
file.seek(8, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&fileStat->localHeaderOffset), 4);
file.read(reinterpret_cast<uint8_t*>(itemName), nameLen);
itemName[nameLen] = '\0';
if (strcmp(itemName, filename) == 0) {
found = true;
break;
}
// Skip the rest of this entry (extra field + comment)
file.seek(m + k, SeekCur);
}
if (!wasOpen) {
close();
}
return found;
}
long ZipFile::getDataOffset(const FileStatSlim& fileStat) {
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return -1;
}
constexpr auto localHeaderSize = 30; constexpr auto localHeaderSize = 30;
uint8_t pLocalHeader[localHeaderSize]; uint8_t pLocalHeader[localHeaderSize];
const uint64_t fileOffset = fileStat.m_local_header_ofs; const uint64_t fileOffset = fileStat.localHeaderOffset;
FILE* file = fopen(filePath.c_str(), "r"); file.seek(fileOffset);
if (!file) { const size_t read = file.read(pLocalHeader, localHeaderSize);
Serial.printf("[%lu] [ZIP] Failed to open file for reading local header\n", millis()); if (!wasOpen) {
return -1; close();
} }
fseek(file, fileOffset, SEEK_SET);
const size_t read = fread(pLocalHeader, 1, localHeaderSize, file);
fclose(file);
if (read != localHeaderSize) { if (read != localHeaderSize) {
Serial.printf("[%lu] [ZIP] Something went wrong reading the local header\n", millis()); Serial.printf("[%lu] [ZIP] Something went wrong reading the local header\n", millis());
@ -83,48 +174,140 @@ long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const {
return fileOffset + localHeaderSize + filenameLength + extraOffset; return fileOffset + localHeaderSize + filenameLength + extraOffset;
} }
bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) const { bool ZipFile::loadZipDetails() {
mz_zip_archive_file_stat fileStat; if (zipDetails.isSet) {
if (!loadFileStat(filename, &fileStat)) {
return false;
}
*size = static_cast<size_t>(fileStat.m_uncomp_size);
return true; return true;
} }
uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) const { const bool wasOpen = isOpen();
mz_zip_archive_file_stat fileStat; if (!wasOpen && !open()) {
if (!loadFileStat(filename, &fileStat)) { return false;
}
const size_t fileSize = file.size();
if (fileSize < 22) {
Serial.printf("[%lu] [ZIP] File too small to be a valid zip\n", millis());
if (!wasOpen) {
close();
}
return false; // Minimum EOCD size is 22 bytes
}
// We scan the last 1KB (or the whole file if smaller) for the EOCD signature
// 0x06054b50 is stored as 0x50, 0x4b, 0x05, 0x06 in little-endian
const int scanRange = fileSize > 1024 ? 1024 : fileSize;
const auto buffer = static_cast<uint8_t*>(malloc(scanRange));
if (!buffer) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for EOCD scan buffer\n", millis());
if (!wasOpen) {
close();
}
return false;
}
file.seek(fileSize - scanRange);
file.read(buffer, scanRange);
// Scan backwards for the signature
int foundOffset = -1;
for (int i = scanRange - 22; i >= 0; i--) {
constexpr uint32_t signature = 0x06054b50;
if (*reinterpret_cast<uint32_t*>(&buffer[i]) == signature) {
foundOffset = i;
break;
}
}
if (foundOffset == -1) {
Serial.printf("[%lu] [ZIP] EOCD signature not found in zip file\n", millis());
free(buffer);
if (!wasOpen) {
close();
}
return false;
}
// Now extract the values we need from the EOCD record
// Relative positions within EOCD:
// Offset 10: Total number of entries (2 bytes)
// Offset 16: Offset of start of central directory with respect to the starting disk number (4 bytes)
zipDetails.totalEntries = *reinterpret_cast<uint16_t*>(&buffer[foundOffset + 10]);
zipDetails.centralDirOffset = *reinterpret_cast<uint32_t*>(&buffer[foundOffset + 16]);
zipDetails.isSet = true;
free(buffer);
if (!wasOpen) {
close();
}
return true;
}
bool ZipFile::open() {
if (!FsHelpers::openFileForRead("ZIP", filePath, file)) {
return false;
}
return true;
}
bool ZipFile::close() {
if (file) {
file.close();
}
return true;
}
bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) {
FileStatSlim fileStat = {};
if (!loadFileStatSlim(filename, &fileStat)) {
return false;
}
*size = static_cast<size_t>(fileStat.uncompressedSize);
return true;
}
uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) {
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return nullptr;
}
FileStatSlim fileStat = {};
if (!loadFileStatSlim(filename, &fileStat)) {
if (!wasOpen) {
close();
}
return nullptr; return nullptr;
} }
const long fileOffset = getDataOffset(fileStat); const long fileOffset = getDataOffset(fileStat);
if (fileOffset < 0) { if (fileOffset < 0) {
if (!wasOpen) {
close();
}
return nullptr; return nullptr;
} }
FILE* file = fopen(filePath.c_str(), "rb"); file.seek(fileOffset);
if (!file) {
Serial.printf("[%lu] [ZIP] Failed to open file for reading\n", millis());
return nullptr;
}
fseek(file, fileOffset, SEEK_SET);
const auto deflatedDataSize = static_cast<size_t>(fileStat.m_comp_size); const auto deflatedDataSize = fileStat.compressedSize;
const auto inflatedDataSize = static_cast<size_t>(fileStat.m_uncomp_size); const auto inflatedDataSize = fileStat.uncompressedSize;
const auto dataSize = trailingNullByte ? inflatedDataSize + 1 : inflatedDataSize; const auto dataSize = trailingNullByte ? inflatedDataSize + 1 : inflatedDataSize;
const auto data = static_cast<uint8_t*>(malloc(dataSize)); const auto data = static_cast<uint8_t*>(malloc(dataSize));
if (data == nullptr) { if (data == nullptr) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for output buffer (%zu bytes)\n", millis(), dataSize); Serial.printf("[%lu] [ZIP] Failed to allocate memory for output buffer (%zu bytes)\n", millis(), dataSize);
fclose(file); if (!wasOpen) {
close();
}
return nullptr; return nullptr;
} }
if (fileStat.m_method == MZ_NO_COMPRESSION) { if (fileStat.method == MZ_NO_COMPRESSION) {
// no deflation, just read content // no deflation, just read content
const size_t dataRead = fread(data, 1, inflatedDataSize, file); const size_t dataRead = file.read(data, inflatedDataSize);
fclose(file); if (!wasOpen) {
close();
}
if (dataRead != inflatedDataSize) { if (dataRead != inflatedDataSize) {
Serial.printf("[%lu] [ZIP] Failed to read data\n", millis()); Serial.printf("[%lu] [ZIP] Failed to read data\n", millis());
@ -133,17 +316,21 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
} }
// Continue out of block with data set // Continue out of block with data set
} else if (fileStat.m_method == MZ_DEFLATED) { } else if (fileStat.method == MZ_DEFLATED) {
// Read out deflated content from file // Read out deflated content from file
const auto deflatedData = static_cast<uint8_t*>(malloc(deflatedDataSize)); const auto deflatedData = static_cast<uint8_t*>(malloc(deflatedDataSize));
if (deflatedData == nullptr) { if (deflatedData == nullptr) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for decompression buffer\n", millis()); Serial.printf("[%lu] [ZIP] Failed to allocate memory for decompression buffer\n", millis());
fclose(file); if (!wasOpen) {
close();
}
return nullptr; return nullptr;
} }
const size_t dataRead = fread(deflatedData, 1, deflatedDataSize, file); const size_t dataRead = file.read(deflatedData, deflatedDataSize);
fclose(file); if (!wasOpen) {
close();
}
if (dataRead != deflatedDataSize) { if (dataRead != deflatedDataSize) {
Serial.printf("[%lu] [ZIP] Failed to read data, expected %d got %d\n", millis(), deflatedDataSize, dataRead); Serial.printf("[%lu] [ZIP] Failed to read data, expected %d got %d\n", millis(), deflatedDataSize, dataRead);
@ -152,7 +339,7 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
return nullptr; return nullptr;
} }
bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize); const bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize);
free(deflatedData); free(deflatedData);
if (!success) { if (!success) {
@ -164,7 +351,9 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
// Continue out of block with data set // Continue out of block with data set
} else { } else {
Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis()); Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis());
fclose(file); if (!wasOpen) {
close();
}
return nullptr; return nullptr;
} }
@ -173,9 +362,14 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
return data; return data;
} }
bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t chunkSize) const { bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t chunkSize) {
mz_zip_archive_file_stat fileStat; const bool wasOpen = isOpen();
if (!loadFileStat(filename, &fileStat)) { if (!wasOpen && !open()) {
return false;
}
FileStatSlim fileStat = {};
if (!loadFileStatSlim(filename, &fileStat)) {
return false; return false;
} }
@ -184,32 +378,30 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
return false; return false;
} }
FILE* file = fopen(filePath.c_str(), "rb"); file.seek(fileOffset);
if (!file) { const auto deflatedDataSize = fileStat.compressedSize;
Serial.printf("[%lu] [ZIP] Failed to open file for streaming\n", millis()); const auto inflatedDataSize = fileStat.uncompressedSize;
return false;
}
fseek(file, fileOffset, SEEK_SET);
const auto deflatedDataSize = static_cast<size_t>(fileStat.m_comp_size); if (fileStat.method == MZ_NO_COMPRESSION) {
const auto inflatedDataSize = static_cast<size_t>(fileStat.m_uncomp_size);
if (fileStat.m_method == MZ_NO_COMPRESSION) {
// no deflation, just read content // no deflation, just read content
const auto buffer = static_cast<uint8_t*>(malloc(chunkSize)); const auto buffer = static_cast<uint8_t*>(malloc(chunkSize));
if (!buffer) { if (!buffer) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for buffer\n", millis()); Serial.printf("[%lu] [ZIP] Failed to allocate memory for buffer\n", millis());
fclose(file); if (!wasOpen) {
close();
}
return false; return false;
} }
size_t remaining = inflatedDataSize; size_t remaining = inflatedDataSize;
while (remaining > 0) { while (remaining > 0) {
const size_t dataRead = fread(buffer, 1, remaining < chunkSize ? remaining : chunkSize, file); const size_t dataRead = file.read(buffer, remaining < chunkSize ? remaining : chunkSize);
if (dataRead == 0) { if (dataRead == 0) {
Serial.printf("[%lu] [ZIP] Could not read more bytes\n", millis()); Serial.printf("[%lu] [ZIP] Could not read more bytes\n", millis());
free(buffer); free(buffer);
fclose(file); if (!wasOpen) {
close();
}
return false; return false;
} }
@ -217,17 +409,21 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
remaining -= dataRead; remaining -= dataRead;
} }
fclose(file); if (!wasOpen) {
close();
}
free(buffer); free(buffer);
return true; return true;
} }
if (fileStat.m_method == MZ_DEFLATED) { if (fileStat.method == MZ_DEFLATED) {
// Setup inflator // Setup inflator
const auto inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor))); const auto inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
if (!inflator) { if (!inflator) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for inflator\n", millis()); Serial.printf("[%lu] [ZIP] Failed to allocate memory for inflator\n", millis());
fclose(file); if (!wasOpen) {
close();
}
return false; return false;
} }
memset(inflator, 0, sizeof(tinfl_decompressor)); memset(inflator, 0, sizeof(tinfl_decompressor));
@ -238,7 +434,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
if (!fileReadBuffer) { if (!fileReadBuffer) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for zip file read buffer\n", millis()); Serial.printf("[%lu] [ZIP] Failed to allocate memory for zip file read buffer\n", millis());
free(inflator); free(inflator);
fclose(file); if (!wasOpen) {
close();
}
return false; return false;
} }
@ -247,7 +445,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
Serial.printf("[%lu] [ZIP] Failed to allocate memory for dictionary\n", millis()); Serial.printf("[%lu] [ZIP] Failed to allocate memory for dictionary\n", millis());
free(inflator); free(inflator);
free(fileReadBuffer); free(fileReadBuffer);
fclose(file); if (!wasOpen) {
close();
}
return false; return false;
} }
memset(outputBuffer, 0, TINFL_LZ_DICT_SIZE); memset(outputBuffer, 0, TINFL_LZ_DICT_SIZE);
@ -267,7 +467,7 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
} }
fileReadBufferFilledBytes = fileReadBufferFilledBytes =
fread(fileReadBuffer, 1, fileRemainingBytes < chunkSize ? fileRemainingBytes : chunkSize, file); file.read(fileReadBuffer, fileRemainingBytes < chunkSize ? fileRemainingBytes : chunkSize);
fileRemainingBytes -= fileReadBufferFilledBytes; fileRemainingBytes -= fileReadBufferFilledBytes;
fileReadBufferCursor = 0; fileReadBufferCursor = 0;
@ -294,7 +494,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
processedOutputBytes += outBytes; processedOutputBytes += outBytes;
if (out.write(outputBuffer + outputCursor, outBytes) != outBytes) { if (out.write(outputBuffer + outputCursor, outBytes) != outBytes) {
Serial.printf("[%lu] [ZIP] Failed to write all output bytes to stream\n", millis()); Serial.printf("[%lu] [ZIP] Failed to write all output bytes to stream\n", millis());
fclose(file); if (!wasOpen) {
close();
}
free(outputBuffer); free(outputBuffer);
free(fileReadBuffer); free(fileReadBuffer);
free(inflator); free(inflator);
@ -306,7 +508,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
if (status < 0) { if (status < 0) {
Serial.printf("[%lu] [ZIP] tinfl_decompress() failed with status %d\n", millis(), status); Serial.printf("[%lu] [ZIP] tinfl_decompress() failed with status %d\n", millis(), status);
fclose(file); if (!wasOpen) {
close();
}
free(outputBuffer); free(outputBuffer);
free(fileReadBuffer); free(fileReadBuffer);
free(inflator); free(inflator);
@ -316,7 +520,9 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
if (status == TINFL_STATUS_DONE) { if (status == TINFL_STATUS_DONE) {
Serial.printf("[%lu] [ZIP] Decompressed %d bytes into %d bytes\n", millis(), deflatedDataSize, Serial.printf("[%lu] [ZIP] Decompressed %d bytes into %d bytes\n", millis(), deflatedDataSize,
inflatedDataSize); inflatedDataSize);
fclose(file); if (!wasOpen) {
close();
}
free(inflator); free(inflator);
free(fileReadBuffer); free(fileReadBuffer);
free(outputBuffer); free(outputBuffer);
@ -326,13 +532,19 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
// If we get here, EOF reached without TINFL_STATUS_DONE // If we get here, EOF reached without TINFL_STATUS_DONE
Serial.printf("[%lu] [ZIP] Unexpected EOF\n", millis()); Serial.printf("[%lu] [ZIP] Unexpected EOF\n", millis());
fclose(file); if (!wasOpen) {
close();
}
free(outputBuffer); free(outputBuffer);
free(fileReadBuffer); free(fileReadBuffer);
free(inflator); free(inflator);
return false; return false;
} }
if (!wasOpen) {
close();
}
Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis()); Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis());
return false; return false;
} }

View File

@ -1,20 +1,46 @@
#pragma once #pragma once
#include <Print.h> #include <FS.h>
#include <string> #include <string>
#include <unordered_map>
#include "miniz.h"
class ZipFile { class ZipFile {
std::string filePath; public:
mutable mz_zip_archive zipArchive = {}; struct FileStatSlim {
bool loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat) const; uint16_t method; // Compression method
long getDataOffset(const mz_zip_archive_file_stat& fileStat) const; uint32_t compressedSize; // Compressed size
uint32_t uncompressedSize; // Uncompressed size
uint32_t localHeaderOffset; // Offset of local file header
};
struct ZipDetails {
uint32_t centralDirOffset;
uint16_t totalEntries;
bool isSet;
};
private:
const std::string& filePath;
File file;
ZipDetails zipDetails = {0, 0, false};
std::unordered_map<std::string, FileStatSlim> fileStatSlimCache;
bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat);
long getDataOffset(const FileStatSlim& fileStat);
bool loadZipDetails();
public: public:
explicit ZipFile(std::string filePath); explicit ZipFile(const std::string& filePath) : filePath(filePath) {}
~ZipFile() { mz_zip_reader_end(&zipArchive); } ~ZipFile() = default;
bool getInflatedFileSize(const char* filename, size_t* size) const; // Zip file can be opened and closed by hand in order to allow for quick calculation of inflated file size
uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false) const; // It is NOT recommended to pre-open it for any kind of inflation due to memory constraints
bool readFileToStream(const char* filename, Print& out, size_t chunkSize) const; bool isOpen() const { return !!file; }
bool open();
bool close();
bool loadAllFileStatSlims();
bool getInflatedFileSize(const char* filename, size_t* size);
// Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple
// These functions will open and close the zip as needed
uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false);
bool readFileToStream(const char* filename, Print& out, size_t chunkSize);
}; };