Compare commits

...

2 Commits

Author SHA1 Message Date
Daniel
f3e3f4c56e fix: remove OOM-causing hrefToSpineIndex map from TOC pass
The unordered_map with 2768 string keys (~100KB+) was causing OOM crashes
at beginTocPass() on ESP32-C3's limited ~380KB RAM.

Reverted createTocEntry() to use original O(n) spine file scan instead.
Kept the safe spineToTocIndex vector in buildBookBin() (only ~5.5KB).
2026-01-20 20:41:27 -08:00
Daniel
05e409ce99 perf: optimize large EPUB indexing from O(n²) to O(n)
Replace O(n²) lookups with O(n) preprocessing:

1. createTocEntry(): Build href->spineIndex map once in beginTocPass()
   instead of scanning spine file for every TOC entry

2. buildBookBin(): Build spineIndex->tocIndex vector in single pass
   instead of scanning TOC file for every spine entry

For 2768-chapter EPUBs, this reduces:
- TOC pass: from ~7.6M file reads to ~5.5K reads
- buildBookBin: from ~7.6M file reads to ~5.5K reads

Memory impact: ~80KB for href map (acceptable trade-off for 10x+ speedup)
2026-01-20 12:40:36 -08:00

View File

@ -48,6 +48,7 @@ bool BookMetadataCache::beginTocPass() {
spineFile.close();
return false;
}
return true;
}
@ -124,6 +125,18 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
// LUTs complete
// Loop through spines from spine file matching up TOC indexes, calculating cumulative size and writing to book.bin
// Build spineIndex->tocIndex mapping in one pass (O(n) instead of O(n*m))
std::vector<int16_t> spineToTocIndex(spineCount, -1);
tocFile.seek(0);
for (int j = 0; j < tocCount; j++) {
auto tocEntry = readTocEntry(tocFile);
if (tocEntry.spineIndex >= 0 && tocEntry.spineIndex < spineCount) {
if (spineToTocIndex[tocEntry.spineIndex] == -1) {
spineToTocIndex[tocEntry.spineIndex] = static_cast<int16_t>(j);
}
}
}
ZipFile zip(epubPath);
// Pre-open zip file to speed up size calculations
if (!zip.open()) {
@ -150,14 +163,7 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile);
tocFile.seek(0);
for (int j = 0; j < tocCount; j++) {
auto tocEntry = readTocEntry(tocFile);
if (tocEntry.spineIndex == i) {
spineEntry.tocIndex = j;
break;
}
}
spineEntry.tocIndex = spineToTocIndex[i];
// Not a huge deal if we don't fine a TOC entry for the spine entry, this is expected behaviour for EPUBs
// Logging here is for debugging
@ -248,19 +254,15 @@ void BookMetadataCache::createTocEntry(const std::string& title, const std::stri
return;
}
int spineIndex = -1;
// find spine index
// TODO: This lookup is slow as need to scan through all items each time. We can't hold it all in memory due to size.
// But perhaps we can load just the hrefs in a vector/list to do an index lookup?
int16_t spineIndex = -1;
spineFile.seek(0);
for (int i = 0; i < spineCount; i++) {
auto spineEntry = readSpineEntry(spineFile);
if (spineEntry.href == href) {
spineIndex = i;
spineIndex = static_cast<int16_t>(i);
break;
}
}
if (spineIndex == -1) {
Serial.printf("[%lu] [BMC] addTocEntry: Could not find spine item for TOC href %s\n", millis(), href.c_str());
}