mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-05 23:27:38 +03:00
Three optimizations for EPUBs with many chapters (e.g. 2768 chapters): 1. OPF idref→href lookup: Build sorted hash index during manifest parsing, use binary search during spine resolution. Reduces ~4min to ~30-60s. 2. TOC href→spineIndex lookup: Build sorted hash index in beginTocPass(), use binary search in createTocEntry(). Reduces ~4min to ~30-60s. 3. ZIP central-dir cursor: Resume scanning from last position instead of restarting from beginning. Reduces ~8min to ~1-3min. All optimizations only activate for large EPUBs (≥400 spine items). Small books use unchanged code paths. Memory impact: ~33KB + ~39KB temporary during indexing, freed after. Expected total: ~17min → ~3-5min for Shadow Slave (2768 chapters). Also adds phase timing logs for performance measurement.
78 lines
2.0 KiB
C++
78 lines
2.0 KiB
C++
#pragma once
|
|
#include <Print.h>
|
|
|
|
#include <vector>
|
|
#include <algorithm>
|
|
|
|
#include "Epub.h"
|
|
#include "expat.h"
|
|
|
|
class BookMetadataCache;
|
|
|
|
class ContentOpfParser final : public Print {
|
|
enum ParserState {
|
|
START,
|
|
IN_PACKAGE,
|
|
IN_METADATA,
|
|
IN_BOOK_TITLE,
|
|
IN_BOOK_AUTHOR,
|
|
IN_BOOK_LANGUAGE,
|
|
IN_MANIFEST,
|
|
IN_SPINE,
|
|
IN_GUIDE,
|
|
};
|
|
|
|
const std::string& cachePath;
|
|
const std::string& baseContentPath;
|
|
size_t remainingSize;
|
|
XML_Parser parser = nullptr;
|
|
ParserState state = START;
|
|
BookMetadataCache* cache;
|
|
FsFile tempItemStore;
|
|
std::string coverItemId;
|
|
|
|
// Index for fast idref→href lookup (used only for large EPUBs)
|
|
struct ItemIndexEntry {
|
|
uint32_t idHash; // FNV-1a hash of itemId
|
|
uint16_t idLen; // length for collision reduction
|
|
uint32_t fileOffset; // offset in .items.bin
|
|
};
|
|
std::vector<ItemIndexEntry> itemIndex;
|
|
bool useItemIndex = false;
|
|
|
|
static constexpr uint16_t LARGE_SPINE_THRESHOLD = 400;
|
|
|
|
// FNV-1a hash function
|
|
static uint32_t fnvHash(const std::string& s) {
|
|
uint32_t hash = 2166136261u;
|
|
for (char c : s) {
|
|
hash ^= static_cast<uint8_t>(c);
|
|
hash *= 16777619u;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
|
|
static void characterData(void* userData, const XML_Char* s, int len);
|
|
static void endElement(void* userData, const XML_Char* name);
|
|
|
|
public:
|
|
std::string title;
|
|
std::string author;
|
|
std::string language;
|
|
std::string tocNcxPath;
|
|
std::string tocNavPath; // EPUB 3 nav document path
|
|
std::string coverItemHref;
|
|
std::string textReferenceHref;
|
|
|
|
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
|
|
BookMetadataCache* cache)
|
|
: cachePath(cachePath), baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
|
|
~ContentOpfParser() override;
|
|
|
|
bool setup();
|
|
|
|
size_t write(uint8_t) override;
|
|
size_t write(const uint8_t* buffer, size_t size) override;
|
|
};
|