Use custom implementation of ZIP metadata parsing

This commit is contained in:
Dave Allie 2025-12-29 01:43:55 +11:00
parent d5f750ba65
commit 134409023b
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F
4 changed files with 311 additions and 76 deletions

View File

@ -320,7 +320,7 @@ bool Epub::generateCoverBmp() const {
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, const bool trailingNullByte) const {
const std::string path = FsHelpers::normalisePath(itemHref);
const auto content = ZipFile("/sd" + filepath).readFileToMemory(path.c_str(), size, trailingNullByte);
const auto content = ZipFile(filepath).readFileToMemory(path.c_str(), size, trailingNullByte);
if (!content) {
Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str());
return nullptr;
@ -331,12 +331,12 @@ uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
const std::string path = FsHelpers::normalisePath(itemHref);
return ZipFile("/sd" + filepath).readFileToStream(path.c_str(), out, chunkSize);
return ZipFile(filepath).readFileToStream(path.c_str(), out, chunkSize);
}
bool Epub::getItemSize(const std::string& itemHref, size_t* size) const {
const std::string path = FsHelpers::normalisePath(itemHref);
return ZipFile("/sd" + filepath).getInflatedFileSize(path.c_str(), size);
return ZipFile(filepath).getInflatedFileSize(path.c_str(), size);
}
int Epub::getSpineItemsCount() const {

View File

@ -122,7 +122,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
// LUTs complete
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
ZipFile zip("/sd" + epubPath);
ZipFile zip(epubPath);
// Pre-open zip file to speed up size calculations
if (!zip.open()) {
Serial.printf("[%lu] [BMC] Could not open EPUB zip for size calculations\n", millis());
@ -131,6 +131,14 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
tocFile.close();
return false;
}
if (!zip.loadAllLocalHeaderOffsets()) {
Serial.printf("[%lu] [BMC] Could not load zip local header offsets for size calculations\n", millis());
bookFile.close();
spineFile.close();
tocFile.close();
zip.close();
return false;
}
size_t cumSize = 0;
spineFile.seek(0);
for (int i = 0; i < spineCount; i++) {

View File

@ -1,5 +1,6 @@
#include "ZipFile.h"
#include <FsHelpers.h>
#include <HardwareSerial.h>
#include <miniz.h>
@ -27,27 +28,159 @@ bool inflateOneShot(const uint8_t* inputBuf, const size_t deflatedSize, uint8_t*
return true;
}
bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat) {
bool ZipFile::loadAllLocalHeaderOffsets() {
const bool wasOpen = isOpen();
if (!wasOpen) {
if (!open()) {
return false;
}
if (!wasOpen && !open()) {
return false;
}
// find the file
mz_uint32 fileIndex = 0;
if (!mz_zip_reader_locate_file_v2(zipArchivePtr.get(), filename, nullptr, 0, &fileIndex)) {
Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis(), filename);
if (!loadZipDetails()) {
Serial.printf("[%lu] [ZIP] loadAllLocalHeaderOffsets failed to load zip details\n", millis());
if (!wasOpen) {
close();
}
return false;
}
if (!mz_zip_reader_file_stat(zipArchivePtr.get(), fileIndex, fileStat)) {
Serial.printf("[%lu] [ZIP] mz_zip_reader_file_stat() failed! Error: %s\n", millis(),
mz_zip_get_error_string(zipArchivePtr->m_last_error));
file.seek(zipDetails.centralDirOffset);
uint32_t sig;
char itemName[256];
while (file.available()) {
file.read(reinterpret_cast<uint8_t*>(&sig), 4);
if (sig != 0x02014b50) break; // End of list
file.seek(24, SeekCur);
uint16_t nameLen, m, k;
file.read(reinterpret_cast<uint8_t*>(&nameLen), 2);
file.read(reinterpret_cast<uint8_t*>(&m), 2);
file.read(reinterpret_cast<uint8_t*>(&k), 2);
uint32_t localHeaderOffset;
file.seek(8, SeekCur);
file.read(reinterpret_cast<uint8_t*>(&localHeaderOffset), 4);
file.read(reinterpret_cast<uint8_t*>(itemName), nameLen);
itemName[nameLen] = '\0';
localHeaderOffsets.emplace(itemName, localHeaderOffset);
// Skip the rest of this entry (extra field + comment)
file.seek(m + k, SeekCur);
}
if (!wasOpen) {
close();
}
return true;
}
bool ZipFile::loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset) {
if (localHeaderOffsets.count(filename) > 0) {
*localHeaderOffset = localHeaderOffsets.at(filename);
Serial.printf("[%lu] [ZIP] Found cached local header offset for file: %s (LHO: %lu)\n", millis(), filename,
static_cast<unsigned long>(*localHeaderOffset));
return true;
}
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return false;
}
if (!loadZipDetails()) {
Serial.printf("[%lu] [ZIP] loadFileStatV2 failed to load zip details\n", millis());
if (!wasOpen) {
close();
}
return false;
}
file.seek(zipDetails.centralDirOffset);
uint32_t sig;
char itemName[256];
bool found = false;
while (file.available()) {
file.read(reinterpret_cast<uint8_t*>(&sig), 4);
if (sig != 0x02014b50) break; // End of list
file.seek(24, SeekCur);
uint16_t nameLen, m, k;
file.read(reinterpret_cast<uint8_t*>(&nameLen), 2);
file.read(reinterpret_cast<uint8_t*>(&m), 2);
file.read(reinterpret_cast<uint8_t*>(&k), 2);
file.seek(8, SeekCur);
file.read(reinterpret_cast<uint8_t*>(localHeaderOffset), 4);
file.read(reinterpret_cast<uint8_t*>(itemName), nameLen);
itemName[nameLen] = '\0';
Serial.printf("[%lu] [ZIP] Checking file in central dir: %s (LHO: %lu, LN: %d)\n", millis(), itemName,
static_cast<unsigned long>(*localHeaderOffset), nameLen);
if (strcmp(itemName, filename) == 0) {
found = true;
break;
}
// Skip the rest of this entry (extra field + comment)
file.seek(m + k, SeekCur);
}
if (!wasOpen) {
close();
}
return found;
}
bool ZipFile::loadFileStatSlim(const char* filename, FileStatSlim* fileStat) {
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return false;
}
if (!loadLocalHeaderOffset(filename, &fileStat->localHeaderOffset)) {
Serial.printf("[%lu] [ZIP] loadFileStatSlim could not find local header offset for file: %s\n", millis(), filename);
if (!wasOpen) {
close();
}
return false;
}
uint32_t sig;
file.seek(fileStat->localHeaderOffset);
file.read(reinterpret_cast<uint8_t*>(&sig), 4);
if (sig != 0x04034b50) {
Serial.printf("[%lu] [ZIP] Incorrect local file header\n", millis());
if (!wasOpen) {
close();
}
return false;
}
file.seek(4, SeekCur); // Skip to method
if (file.read(reinterpret_cast<uint8_t*>(&fileStat->method), 2) != 2) {
Serial.printf("[%lu] [ZIP] Could not read compression method\n", millis());
if (!wasOpen) {
close();
}
return false;
}
file.seek(8, SeekCur); // Skip to sizes
if (file.read(reinterpret_cast<uint8_t*>(&fileStat->compressedSize), 4) != 4) {
Serial.printf("[%lu] [ZIP] Could not read compressed size\n", millis());
if (!wasOpen) {
close();
}
return false;
}
if (file.read(reinterpret_cast<uint8_t*>(&fileStat->uncompressedSize), 4) != 4) {
Serial.printf("[%lu] [ZIP] Could not read uncompressed size\n", millis());
if (!wasOpen) {
close();
}
@ -60,20 +193,22 @@ bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileS
return true;
}
long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const {
long ZipFile::getDataOffset(const FileStatSlim& fileStat) {
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return -1;
}
constexpr auto localHeaderSize = 30;
uint8_t pLocalHeader[localHeaderSize];
const uint64_t fileOffset = fileStat.m_local_header_ofs;
const uint64_t fileOffset = fileStat.localHeaderOffset;
FILE* file = fopen(filePath.c_str(), "r");
if (!file) {
Serial.printf("[%lu] [ZIP] Failed to open file for reading local header\n", millis());
return -1;
file.seek(fileOffset);
const size_t read = file.read(pLocalHeader, localHeaderSize);
if (!wasOpen) {
close();
}
fseek(file, fileOffset, SEEK_SET);
const size_t read = fread(pLocalHeader, 1, localHeaderSize, file);
fclose(file);
if (read != localHeaderSize) {
Serial.printf("[%lu] [ZIP] Something went wrong reading the local header\n", millis());
@ -91,71 +226,133 @@ long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const {
return fileOffset + localHeaderSize + filenameLength + extraOffset;
}
bool ZipFile::open() {
zipArchivePtr.reset(new mz_zip_archive);
mz_zip_zero_struct(zipArchivePtr.get());
const bool status =
mz_zip_reader_init_file(zipArchivePtr.get(), filePath.c_str(), MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY);
bool ZipFile::loadZipDetails() {
if (zipDetails.isSet) {
return true;
}
if (!status) {
Serial.printf("[%lu] [ZIP] mz_zip_reader_init_file() failed for %s! Error: %s\n", millis(), filePath.c_str(),
mz_zip_get_error_string(zipArchivePtr->m_last_error));
zipArchivePtr.reset();
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return false;
}
const size_t fileSize = file.size();
if (fileSize < 22) {
Serial.printf("[%lu] [ZIP] File too small to be a valid zip\n", millis());
if (!wasOpen) {
close();
}
return false; // Minimum EOCD size is 22 bytes
}
// We scan the last 1KB (or the whole file if smaller) for the EOCD signature
// 0x06054b50 is stored as 0x50, 0x4b, 0x05, 0x06 in little-endian
const int scanRange = fileSize > 1024 ? 1024 : fileSize;
const auto buffer = static_cast<uint8_t*>(malloc(scanRange));
file.seek(fileSize - scanRange);
file.read(buffer, scanRange);
// Scan backwards for the signature
int foundOffset = -1;
for (int i = scanRange - 22; i >= 0; i--) {
constexpr uint32_t signature = 0x06054b50;
if (*reinterpret_cast<uint32_t*>(&buffer[i]) == signature) {
foundOffset = i;
break;
}
}
if (foundOffset == -1) {
Serial.printf("[%lu] [ZIP] EOCD signature not found in zip file\n", millis());
free(buffer);
if (!wasOpen) {
close();
}
return false;
}
// Now extract the values we need from the EOCD record
// Relative positions within EOCD:
// Offset 10: Total number of entries (2 bytes)
// Offset 16: Offset of start of central directory with respect to the starting disk number (4 bytes)
zipDetails.totalEntries = *reinterpret_cast<uint16_t*>(&buffer[foundOffset + 10]);
zipDetails.centralDirOffset = *reinterpret_cast<uint32_t*>(&buffer[foundOffset + 16]);
zipDetails.isSet = true;
free(buffer);
if (!wasOpen) {
close();
}
return true;
}
bool ZipFile::open() {
if (!FsHelpers::openFileForRead("ZIP", &filePath[3], file)) {
return false;
}
return true;
}
bool ZipFile::close() {
if (zipArchivePtr) {
mz_zip_reader_end(zipArchivePtr.get());
zipArchivePtr.reset();
if (file) {
file.close();
}
return true;
}
bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) {
mz_zip_archive_file_stat fileStat = {};
if (!loadFileStat(filename, &fileStat)) {
FileStatSlim fileStat = {};
if (!loadFileStatSlim(filename, &fileStat)) {
return false;
}
*size = static_cast<size_t>(fileStat.m_uncomp_size);
*size = static_cast<size_t>(fileStat.uncompressedSize);
return true;
}
uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) {
mz_zip_archive_file_stat fileStat;
if (!loadFileStat(filename, &fileStat)) {
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return nullptr;
}
FileStatSlim fileStat = {};
if (!loadFileStatSlim(filename, &fileStat)) {
if (!wasOpen) {
close();
}
return nullptr;
}
const long fileOffset = getDataOffset(fileStat);
if (fileOffset < 0) {
if (!wasOpen) {
close();
}
return nullptr;
}
FILE* file = fopen(filePath.c_str(), "rb");
if (!file) {
Serial.printf("[%lu] [ZIP] Failed to open file for reading\n", millis());
return nullptr;
}
fseek(file, fileOffset, SEEK_SET);
file.seek(fileOffset);
const auto deflatedDataSize = static_cast<size_t>(fileStat.m_comp_size);
const auto inflatedDataSize = static_cast<size_t>(fileStat.m_uncomp_size);
const auto deflatedDataSize = fileStat.compressedSize;
const auto inflatedDataSize = fileStat.uncompressedSize;
const auto dataSize = trailingNullByte ? inflatedDataSize + 1 : inflatedDataSize;
const auto data = static_cast<uint8_t*>(malloc(dataSize));
if (data == nullptr) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for output buffer (%zu bytes)\n", millis(), dataSize);
fclose(file);
if (!wasOpen) {
close();
}
return nullptr;
}
if (fileStat.m_method == MZ_NO_COMPRESSION) {
if (fileStat.method == MZ_NO_COMPRESSION) {
// no deflation, just read content
const size_t dataRead = fread(data, 1, inflatedDataSize, file);
fclose(file);
const size_t dataRead = file.read(data, inflatedDataSize);
if (!wasOpen) {
close();
}
if (dataRead != inflatedDataSize) {
Serial.printf("[%lu] [ZIP] Failed to read data\n", millis());
@ -164,17 +361,21 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
}
// Continue out of block with data set
} else if (fileStat.m_method == MZ_DEFLATED) {
} else if (fileStat.method == MZ_DEFLATED) {
// Read out deflated content from file
const auto deflatedData = static_cast<uint8_t*>(malloc(deflatedDataSize));
if (deflatedData == nullptr) {
Serial.printf("[%lu] [ZIP] Failed to allocate memory for decompression buffer\n", millis());
fclose(file);
if (!wasOpen) {
close();
}
return nullptr;
}
const size_t dataRead = fread(deflatedData, 1, deflatedDataSize, file);
fclose(file);
const size_t dataRead = file.read(deflatedData, deflatedDataSize);
if (!wasOpen) {
close();
}
if (dataRead != deflatedDataSize) {
Serial.printf("[%lu] [ZIP] Failed to read data, expected %d got %d\n", millis(), deflatedDataSize, dataRead);
@ -183,7 +384,7 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
return nullptr;
}
bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize);
const bool success = inflateOneShot(deflatedData, deflatedDataSize, data, inflatedDataSize);
free(deflatedData);
if (!success) {
@ -195,7 +396,9 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
// Continue out of block with data set
} else {
Serial.printf("[%lu] [ZIP] Unsupported compression method\n", millis());
fclose(file);
if (!wasOpen) {
close();
}
return nullptr;
}
@ -205,8 +408,13 @@ uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const boo
}
bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t chunkSize) {
mz_zip_archive_file_stat fileStat;
if (!loadFileStat(filename, &fileStat)) {
const bool wasOpen = isOpen();
if (!wasOpen && !open()) {
return false;
}
FileStatSlim fileStat = {};
if (!loadFileStatSlim(filename, &fileStat)) {
return false;
}
@ -222,10 +430,10 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
}
fseek(file, fileOffset, SEEK_SET);
const auto deflatedDataSize = static_cast<size_t>(fileStat.m_comp_size);
const auto inflatedDataSize = static_cast<size_t>(fileStat.m_uncomp_size);
const auto deflatedDataSize = fileStat.compressedSize;
const auto inflatedDataSize = fileStat.uncompressedSize;
if (fileStat.m_method == MZ_NO_COMPRESSION) {
if (fileStat.method == MZ_NO_COMPRESSION) {
// no deflation, just read content
const auto buffer = static_cast<uint8_t*>(malloc(chunkSize));
if (!buffer) {
@ -253,7 +461,7 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
return true;
}
if (fileStat.m_method == MZ_DEFLATED) {
if (fileStat.method == MZ_DEFLATED) {
// Setup inflator
const auto inflator = static_cast<tinfl_decompressor*>(malloc(sizeof(tinfl_decompressor)));
if (!inflator) {

View File

@ -1,25 +1,44 @@
#pragma once
#include <Print.h>
#include <FS.h>
#include <memory>
#include <map>
#include <string>
#include "miniz.h"
class ZipFile {
std::string filePath;
std::unique_ptr<mz_zip_archive> zipArchivePtr;
bool loadFileStat(const char* filename, mz_zip_archive_file_stat* fileStat);
long getDataOffset(const mz_zip_archive_file_stat& fileStat) const;
public:
struct FileStatSlim {
uint16_t method; // Compression method
uint32_t compressedSize; // Compressed size
uint32_t uncompressedSize; // Uncompressed size
uint32_t localHeaderOffset; // Offset of local file header
};
struct ZipDetails {
uint32_t centralDirOffset;
uint16_t totalEntries;
bool isSet;
};
private:
const std::string& filePath;
File file;
ZipDetails zipDetails = {0, 0, false};
std::map<std::string, uint32_t> localHeaderOffsets;
bool loadLocalHeaderOffset(const char* filename, uint32_t* localHeaderOffset);
bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat);
long getDataOffset(const FileStatSlim& fileStat);
bool loadZipDetails();
public:
explicit ZipFile(std::string filePath) : filePath(std::move(filePath)) {}
explicit ZipFile(const std::string& filePath) : filePath(filePath) {}
~ZipFile() = default;
// Zip file can be opened and closed by hand in order to allow for quick calculation of inflated file size
// It is NOT recommended to pre-open it for any kind of inflation due to memory constraints
bool isOpen() const { return zipArchivePtr != nullptr; }
bool isOpen() const { return !!file; }
bool open();
bool close();
bool loadAllLocalHeaderOffsets();
bool getInflatedFileSize(const char* filename, size_t* size);
// Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple
// These functions will open and close the zip as needed