Remove tinyxml2 dependency replace with expat parsers

This commit is contained in:
Dave Allie 2025-12-13 19:30:00 +11:00
parent 6ddcf9b592
commit 9bf9013484
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F
17 changed files with 696 additions and 253 deletions

View File

@ -7,247 +7,148 @@
#include <map> #include <map>
#include "Epub/FsHelpers.h" #include "Epub/FsHelpers.h"
#include "Epub/parsers/ContainerParser.h"
#include "Epub/parsers/ContentOpfParser.h"
#include "Epub/parsers/TocNcxParser.h"
bool Epub::findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile) { bool Epub::findContentOpfFile(std::string* contentOpfFile) const {
// open up the meta data to find where the content.opf file lives const auto containerPath = "META-INF/container.xml";
size_t s; size_t containerSize;
const auto metaInfo = reinterpret_cast<char*>(zip.readFileToMemory("META-INF/container.xml", &s, true));
if (!metaInfo) { // Get file size without loading it all into heap
Serial.printf("[%lu] [EBP] Could not find META-INF/container.xml\n", millis()); if (!getItemSize(containerPath, &containerSize)) {
Serial.printf("[%lu] [EBP] Could not find or size META-INF/container.xml\n", millis());
return false; return false;
} }
// parse the meta data ContainerParser containerParser(containerSize);
tinyxml2::XMLDocument metaDataDoc;
const auto result = metaDataDoc.Parse(metaInfo);
free(metaInfo);
if (result != tinyxml2::XML_SUCCESS) { if (!containerParser.setup()) {
Serial.printf("[%lu] [EBP] Could not parse META-INF/container.xml. Error: %d\n", millis(), result);
return false; return false;
} }
const auto container = metaDataDoc.FirstChildElement("container"); // Stream read (reusing your existing stream logic)
if (!container) { if (!readItemContentsToStream(containerPath, containerParser, 512)) {
Serial.printf("[%lu] [EBP] Could not find container element in META-INF/container.xml\n", millis()); Serial.printf("[%lu] [EBP] Could not read META-INF/container.xml\n", millis());
containerParser.teardown();
return false; return false;
} }
const auto rootfiles = container->FirstChildElement("rootfiles"); // Extract the result
if (!rootfiles) { if (containerParser.fullPath.empty()) {
Serial.printf("[%lu] [EBP] Could not find rootfiles element in META-INF/container.xml\n", millis()); Serial.printf("[%lu] [EBP] Could not find valid rootfile in container.xml\n", millis());
containerParser.teardown();
return false; return false;
} }
// find the root file that has the media-type="application/oebps-package+xml" *contentOpfFile = std::move(containerParser.fullPath);
auto rootfile = rootfiles->FirstChildElement("rootfile");
while (rootfile) {
const char* mediaType = rootfile->Attribute("media-type");
if (mediaType && strcmp(mediaType, "application/oebps-package+xml") == 0) {
const char* full_path = rootfile->Attribute("full-path");
if (full_path) {
contentOpfFile = full_path;
return true;
}
}
rootfile = rootfile->NextSiblingElement("rootfile");
}
Serial.printf("[%lu] [EBP] Could not get path to content.opf file\n", millis()); containerParser.teardown();
return false;
}
bool Epub::parseContentOpf(ZipFile& zip, std::string& content_opf_file) {
// read in the content.opf file and parse it
auto contents = reinterpret_cast<char*>(zip.readFileToMemory(content_opf_file.c_str(), nullptr, true));
// parse the contents
tinyxml2::XMLDocument doc;
auto result = doc.Parse(contents);
free(contents);
if (result != tinyxml2::XML_SUCCESS) {
Serial.printf("[%lu] [EBP] Error parsing content.opf - %s\n", millis(),
tinyxml2::XMLDocument::ErrorIDToName(result));
return false;
}
auto package = doc.FirstChildElement("package");
if (!package) package = doc.FirstChildElement("opf:package");
if (!package) {
Serial.printf("[%lu] [EBP] Could not find package element in content.opf\n", millis());
return false;
}
// get the metadata - title and cover image
auto metadata = package->FirstChildElement("metadata");
if (!metadata) metadata = package->FirstChildElement("opf:metadata");
if (!metadata) {
Serial.printf("[%lu] [EBP] Missing metadata\n", millis());
return false;
}
auto titleEl = metadata->FirstChildElement("dc:title");
if (!titleEl) {
Serial.printf("[%lu] [EBP] Missing title\n", millis());
return false;
}
this->title = titleEl->GetText();
auto cover = metadata->FirstChildElement("meta");
if (!cover) cover = metadata->FirstChildElement("opf:meta");
while (cover && cover->Attribute("name") && strcmp(cover->Attribute("name"), "cover") != 0) {
cover = cover->NextSiblingElement("meta");
}
if (!cover) {
Serial.printf("[%lu] [EBP] Missing cover\n", millis());
}
auto coverItem = cover ? cover->Attribute("content") : nullptr;
// read the manifest and spine
// the manifest gives us the names of the files
// the spine gives us the order of the files
// we can then read the files in the order they are in the spine
auto manifest = package->FirstChildElement("manifest");
if (!manifest) manifest = package->FirstChildElement("opf:manifest");
if (!manifest) {
Serial.printf("[%lu] [EBP] Missing manifest\n", millis());
return false;
}
// create a mapping from id to file name
auto item = manifest->FirstChildElement("item");
if (!item) item = manifest->FirstChildElement("opf:item");
std::map<std::string, std::string> items;
while (item) {
std::string itemId = item->Attribute("id");
std::string href = contentBasePath + item->Attribute("href");
// grab the cover image
if (coverItem && itemId == coverItem) {
coverImageItem = href;
}
// grab the ncx file
if (itemId == "ncx" || itemId == "ncxtoc") {
tocNcxItem = href;
}
items[itemId] = href;
auto nextItem = item->NextSiblingElement("item");
if (!nextItem) nextItem = item->NextSiblingElement("opf:item");
item = nextItem;
}
// find the spine
auto spineEl = package->FirstChildElement("spine");
if (!spineEl) spineEl = package->FirstChildElement("opf:spine");
if (!spineEl) {
Serial.printf("[%lu] [EBP] Missing spine\n", millis());
return false;
}
// read the spine
auto itemref = spineEl->FirstChildElement("itemref");
if (!itemref) itemref = spineEl->FirstChildElement("opf:itemref");
while (itemref) {
auto id = itemref->Attribute("idref");
if (items.find(id) != items.end()) {
spine.emplace_back(id, items[id]);
}
auto nextItemRef = itemref->NextSiblingElement("itemref");
if (!nextItemRef) nextItemRef = itemref->NextSiblingElement("opf:itemref");
itemref = nextItemRef;
}
return true; return true;
} }
bool Epub::parseTocNcxFile(const ZipFile& zip) { bool Epub::parseContentOpf(const std::string& contentOpfFilePath) {
size_t contentOpfSize;
if (!getItemSize(contentOpfFilePath, &contentOpfSize)) {
Serial.printf("[%lu] [EBP] Could not get size of content.opf\n", millis());
return false;
}
ContentOpfParser opfParser(getBasePath(), contentOpfSize);
if (!opfParser.setup()) {
Serial.printf("[%lu] [EBP] Could not setup content.opf parser\n", millis());
return false;
}
if (!readItemContentsToStream(contentOpfFilePath, opfParser, 1024)) {
Serial.printf("[%lu] [EBP] Could not read content.opf\n", millis());
opfParser.teardown();
return false;
}
// Grab data from opfParser into epub
title = opfParser.title;
if (opfParser.items.count("ncx")) {
tocNcxItem = opfParser.items.at("ncx");
} else if (opfParser.items.count("ncxtoc")) {
tocNcxItem = opfParser.items.at("ncxtoc");
}
for (auto& spineRef : opfParser.spineRefs) {
if (opfParser.items.count(spineRef)) {
spine.emplace_back(spineRef, opfParser.items.at(spineRef));
}
}
Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis());
opfParser.teardown();
return true;
}
bool Epub::parseTocNcxFile() {
// the ncx file should have been specified in the content.opf file // the ncx file should have been specified in the content.opf file
if (tocNcxItem.empty()) { if (tocNcxItem.empty()) {
Serial.printf("[%lu] [EBP] No ncx file specified\n", millis()); Serial.printf("[%lu] [EBP] No ncx file specified\n", millis());
return false; return false;
} }
const auto ncxData = reinterpret_cast<char*>(zip.readFileToMemory(tocNcxItem.c_str(), nullptr, true)); size_t tocSize;
if (!ncxData) { if (!getItemSize(tocNcxItem, &tocSize)) {
Serial.printf("[%lu] [EBP] Could not find %s\n", millis(), tocNcxItem.c_str()); Serial.printf("[%lu] [EBP] Could not get size of toc ncx\n", millis());
return false; return false;
} }
// Parse the Toc contents TocNcxParser ncxParser(contentBasePath, tocSize);
tinyxml2::XMLDocument doc;
const auto result = doc.Parse(ncxData);
free(ncxData);
if (result != tinyxml2::XML_SUCCESS) { if (!ncxParser.setup()) {
Serial.printf("[%lu] [EBP] Error parsing toc %s\n", millis(), tinyxml2::XMLDocument::ErrorIDToName(result)); Serial.printf("[%lu] [EBP] Could not setup toc ncx parser\n", millis());
return false; return false;
} }
const auto ncx = doc.FirstChildElement("ncx"); if (!readItemContentsToStream(tocNcxItem, ncxParser, 1024)) {
if (!ncx) { Serial.printf("[%lu] [EBP] Could not read toc ncx stream\n", millis());
Serial.printf("[%lu] [EBP] Could not find first child ncx in toc\n", millis()); ncxParser.teardown();
return false; return false;
} }
const auto navMap = ncx->FirstChildElement("navMap"); this->toc = std::move(ncxParser.toc);
if (!navMap) {
Serial.printf("[%lu] [EBP] Could not find navMap child in ncx\n", millis());
return false;
}
recursivelyParseNavMap(navMap->FirstChildElement("navPoint")); Serial.printf("[%lu] [EBP] Parsed %d TOC items\n", millis(), this->toc.size());
ncxParser.teardown();
return true; return true;
} }
void Epub::recursivelyParseNavMap(tinyxml2::XMLElement* element) {
// Fills toc map
while (element) {
std::string navTitle = element->FirstChildElement("navLabel")->FirstChildElement("text")->FirstChild()->Value();
const auto content = element->FirstChildElement("content");
std::string href = contentBasePath + content->Attribute("src");
// split the href on the # to get the href and the anchor
const size_t pos = href.find('#');
std::string anchor;
if (pos != std::string::npos) {
anchor = href.substr(pos + 1);
href = href.substr(0, pos);
}
toc.emplace_back(navTitle, href, anchor, 0);
tinyxml2::XMLElement* nestedNavPoint = element->FirstChildElement("navPoint");
if (nestedNavPoint) {
recursivelyParseNavMap(nestedNavPoint);
}
element = element->NextSiblingElement("navPoint");
}
}
// load in the meta data for the epub file // load in the meta data for the epub file
bool Epub::load() { bool Epub::load() {
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
ZipFile zip("/sd" + filepath); ZipFile zip("/sd" + filepath);
std::string contentOpfFile; std::string contentOpfFilePath;
if (!findContentOpfFile(zip, contentOpfFile)) { if (!findContentOpfFile(&contentOpfFilePath)) {
Serial.printf("[%lu] [EBP] Could not open ePub\n", millis()); Serial.printf("[%lu] [EBP] Could not find content.opf in zip\n", millis());
return false; return false;
} }
contentBasePath = contentOpfFile.substr(0, contentOpfFile.find_last_of('/') + 1); Serial.printf("[%lu] [EBP] Found content.opf at: %s\n", millis(), contentOpfFilePath.c_str());
if (!parseContentOpf(zip, contentOpfFile)) { contentBasePath = contentOpfFilePath.substr(0, contentOpfFilePath.find_last_of('/') + 1);
if (!parseContentOpf(contentOpfFilePath)) {
Serial.printf("[%lu] [EBP] Could not parse content.opf\n", millis());
return false; return false;
} }
if (!parseTocNcxFile(zip)) { if (!parseTocNcxFile()) {
Serial.printf("[%lu] [EBP] Could not parse toc\n", millis());
return false; return false;
} }
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true; return true;
} }
@ -344,6 +245,13 @@ bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, con
return zip.readFileToStream(path.c_str(), out, chunkSize); return zip.readFileToStream(path.c_str(), out, chunkSize);
} }
bool Epub::getItemSize(const std::string& itemHref, size_t* size) const {
const ZipFile zip("/sd" + filepath);
const std::string path = normalisePath(itemHref);
return zip.getInflatedFileSize(path.c_str(), size);
}
int Epub::getSpineItemsCount() const { return spine.size(); } int Epub::getSpineItemsCount() const { return spine.size(); }
std::string& Epub::getSpineItem(const int spineIndex) { std::string& Epub::getSpineItem(const int spineIndex) {

View File

@ -1,22 +1,13 @@
#pragma once #pragma once
#include <Print.h> #include <Print.h>
#include <tinyxml2.h>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
class ZipFile; #include "Epub/EpubTocEntry.h"
class EpubTocEntry { class ZipFile;
public:
std::string title;
std::string href;
std::string anchor;
int level;
EpubTocEntry(std::string title, std::string href, std::string anchor, const int level)
: title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {}
};
class Epub { class Epub {
// the title read from the EPUB meta data // the title read from the EPUB meta data
@ -36,11 +27,9 @@ class Epub {
// Uniq cache key based on filepath // Uniq cache key based on filepath
std::string cachePath; std::string cachePath;
// find the path for the content.opf file bool findContentOpfFile(std::string* contentOpfFile) const;
static bool findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile); bool parseContentOpf(const std::string& contentOpfFilePath);
bool parseContentOpf(ZipFile& zip, std::string& content_opf_file); bool parseTocNcxFile();
bool parseTocNcxFile(const ZipFile& zip);
void recursivelyParseNavMap(tinyxml2::XMLElement* element);
public: public:
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
@ -59,6 +48,7 @@ class Epub {
uint8_t* readItemContentsToBytes(const std::string& itemHref, size_t* size = nullptr, uint8_t* readItemContentsToBytes(const std::string& itemHref, size_t* size = nullptr,
bool trailingNullByte = false) const; bool trailingNullByte = false) const;
bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const; bool readItemContentsToStream(const std::string& itemHref, Print& out, size_t chunkSize) const;
bool getItemSize(const std::string& itemHref, size_t* size) const;
std::string& getSpineItem(int spineIndex); std::string& getSpineItem(int spineIndex);
int getSpineItemsCount() const; int getSpineItemsCount() const;
EpubTocEntry& getTocItem(int tocTndex); EpubTocEntry& getTocItem(int tocTndex);

View File

@ -0,0 +1,13 @@
#pragma once
#include <string>
class EpubTocEntry {
public:
std::string title;
std::string href;
std::string anchor;
int level;
EpubTocEntry(std::string title, std::string href, std::string anchor, const int level)
: title(std::move(title)), href(std::move(href)), anchor(std::move(anchor)), level(level) {}
};

View File

@ -5,9 +5,9 @@
#include <fstream> #include <fstream>
#include "EpubHtmlParserSlim.h"
#include "FsHelpers.h" #include "FsHelpers.h"
#include "Page.h" #include "Page.h"
#include "parsers/ChapterHtmlSlimParser.h"
constexpr uint8_t SECTION_FILE_VERSION = 4; constexpr uint8_t SECTION_FILE_VERSION = 4;
@ -127,9 +127,9 @@ bool Section::persistPageDataToSD(const int fontId, const float lineCompression,
const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath; const auto sdTmpHtmlPath = "/sd" + tmpHtmlPath;
EpubHtmlParserSlim visitor(sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight, ChapterHtmlSlimParser visitor(sdTmpHtmlPath.c_str(), renderer, fontId, lineCompression, marginTop, marginRight,
marginBottom, marginLeft, marginBottom, marginLeft,
[this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); }); [this](std::unique_ptr<Page> page) { this->onPageComplete(std::move(page)); });
success = visitor.parseAndBuildPages(); success = visitor.parseAndBuildPages();
SD.remove(tmpHtmlPath.c_str()); SD.remove(tmpHtmlPath.c_str());

View File

@ -1,11 +1,11 @@
#include "EpubHtmlParserSlim.h" #include "ChapterHtmlSlimParser.h"
#include <GfxRenderer.h> #include <GfxRenderer.h>
#include <HardwareSerial.h> #include <HardwareSerial.h>
#include <expat.h> #include <expat.h>
#include "Page.h" #include "../Page.h"
#include "htmlEntities.h" #include "../htmlEntities.h"
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"}; const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]); constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
@ -38,7 +38,7 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
} }
// start a new text block if needed // start a new text block if needed
void EpubHtmlParserSlim::startNewTextBlock(const TextBlock::BLOCK_STYLE style) { void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::BLOCK_STYLE style) {
if (currentTextBlock) { if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it // already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) { if (currentTextBlock->isEmpty()) {
@ -51,8 +51,8 @@ void EpubHtmlParserSlim::startNewTextBlock(const TextBlock::BLOCK_STYLE style) {
currentTextBlock.reset(new ParsedText(style)); currentTextBlock.reset(new ParsedText(style));
} }
void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<EpubHtmlParserSlim*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
(void)atts; (void)atts;
// Middle of skip // Middle of skip
@ -62,23 +62,7 @@ void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* na
} }
if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) {
// const char* src = element.Attribute("src"); // TODO: Start processing image tags
// if (src) {
// // don't leave an empty text block in the list
// // const BLOCK_STYLE style = currentTextBlock->get_style();
// if (currentTextBlock->isEmpty()) {
// delete currentTextBlock;
// currentTextBlock = nullptr;
// }
// // TODO: Fix this
// // blocks.push_back(new ImageBlock(m_base_path + src));
// // start a new text block - with the same style as before
// // startNewTextBlock(style);
// } else {
// // ESP_LOGE(TAG, "Could not find src attribute");
// }
// start skip
self->skipUntilDepth = self->depth; self->skipUntilDepth = self->depth;
self->depth += 1; self->depth += 1;
return; return;
@ -109,8 +93,8 @@ void XMLCALL EpubHtmlParserSlim::startElement(void* userData, const XML_Char* na
self->depth += 1; self->depth += 1;
} }
void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s, const int len) { void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<EpubHtmlParserSlim*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
// Middle of skip // Middle of skip
if (self->skipUntilDepth < self->depth) { if (self->skipUntilDepth < self->depth) {
@ -149,8 +133,8 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
} }
} }
void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name) { void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<EpubHtmlParserSlim*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
(void)name; (void)name;
if (self->partWordBufferIndex > 0) { if (self->partWordBufferIndex > 0) {
@ -196,7 +180,7 @@ void XMLCALL EpubHtmlParserSlim::endElement(void* userData, const XML_Char* name
} }
} }
bool EpubHtmlParserSlim::parseAndBuildPages() { bool ChapterHtmlSlimParser::parseAndBuildPages() {
startNewTextBlock(TextBlock::JUSTIFIED); startNewTextBlock(TextBlock::JUSTIFIED);
const XML_Parser parser = XML_ParserCreate(nullptr); const XML_Parser parser = XML_ParserCreate(nullptr);
@ -261,7 +245,7 @@ bool EpubHtmlParserSlim::parseAndBuildPages() {
return true; return true;
} }
void EpubHtmlParserSlim::makePages() { void ChapterHtmlSlimParser::makePages() {
if (!currentTextBlock) { if (!currentTextBlock) {
Serial.printf("[%lu] [EHP] !! No text block to make pages for !!\n", millis()); Serial.printf("[%lu] [EHP] !! No text block to make pages for !!\n", millis());
return; return;

View File

@ -6,15 +6,15 @@
#include <functional> #include <functional>
#include <memory> #include <memory>
#include "ParsedText.h" #include "../ParsedText.h"
#include "blocks/TextBlock.h" #include "../blocks/TextBlock.h"
class Page; class Page;
class GfxRenderer; class GfxRenderer;
#define MAX_WORD_SIZE 200 #define MAX_WORD_SIZE 200
class EpubHtmlParserSlim { class ChapterHtmlSlimParser {
const char* filepath; const char* filepath;
GfxRenderer& renderer; GfxRenderer& renderer;
std::function<void(std::unique_ptr<Page>)> completePageFn; std::function<void(std::unique_ptr<Page>)> completePageFn;
@ -44,10 +44,10 @@ class EpubHtmlParserSlim {
static void XMLCALL endElement(void* userData, const XML_Char* name); static void XMLCALL endElement(void* userData, const XML_Char* name);
public: public:
explicit EpubHtmlParserSlim(const char* filepath, GfxRenderer& renderer, const int fontId, explicit ChapterHtmlSlimParser(const char* filepath, GfxRenderer& renderer, const int fontId,
const float lineCompression, const int marginTop, const int marginRight, const float lineCompression, const int marginTop, const int marginRight,
const int marginBottom, const int marginLeft, const int marginBottom, const int marginLeft,
const std::function<void(std::unique_ptr<Page>)>& completePageFn) const std::function<void(std::unique_ptr<Page>)>& completePageFn)
: filepath(filepath), : filepath(filepath),
renderer(renderer), renderer(renderer),
fontId(fontId), fontId(fontId),
@ -57,6 +57,6 @@ class EpubHtmlParserSlim {
marginBottom(marginBottom), marginBottom(marginBottom),
marginLeft(marginLeft), marginLeft(marginLeft),
completePageFn(completePageFn) {} completePageFn(completePageFn) {}
~EpubHtmlParserSlim() = default; ~ChapterHtmlSlimParser() = default;
bool parseAndBuildPages(); bool parseAndBuildPages();
}; };

View File

@ -0,0 +1,96 @@
#include "ContainerParser.h"
#include <HardwareSerial.h>
bool ContainerParser::setup() {
parser = XML_ParserCreate(nullptr);
if (!parser) {
Serial.printf("[%lu] [CTR] Couldn't allocate memory for parser\n", millis());
return false;
}
XML_SetUserData(parser, this);
XML_SetElementHandler(parser, startElement, endElement);
return true;
}
bool ContainerParser::teardown() {
if (parser) {
XML_ParserFree(parser);
parser = nullptr;
}
return true;
}
size_t ContainerParser::write(const uint8_t data) { return write(&data, 1); }
size_t ContainerParser::write(const uint8_t* buffer, const size_t size) {
if (!parser) return 0;
const uint8_t* currentBufferPos = buffer;
auto remainingInBuffer = size;
while (remainingInBuffer > 0) {
void* const buf = XML_GetBuffer(parser, 1024);
if (!buf) {
Serial.printf("[%lu] [CTR] Couldn't allocate buffer\n", millis());
return 0;
}
const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024;
memcpy(buf, currentBufferPos, toRead);
if (XML_ParseBuffer(parser, static_cast<int>(toRead), remainingSize == toRead) == XML_STATUS_ERROR) {
Serial.printf("[%lu] [CTR] Parse error: %s\n", millis(), XML_ErrorString(XML_GetErrorCode(parser)));
return 0;
}
currentBufferPos += toRead;
remainingInBuffer -= toRead;
remainingSize -= toRead;
}
return size;
}
void XMLCALL ContainerParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ContainerParser*>(userData);
// Simple state tracking to ensure we are looking at the valid schema structure
if (self->state == START && strcmp(name, "container") == 0) {
self->state = IN_CONTAINER;
return;
}
if (self->state == IN_CONTAINER && strcmp(name, "rootfiles") == 0) {
self->state = IN_ROOTFILES;
return;
}
if (self->state == IN_ROOTFILES && strcmp(name, "rootfile") == 0) {
const char* mediaType = nullptr;
const char* path = nullptr;
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "media-type") == 0) {
mediaType = atts[i + 1];
} else if (strcmp(atts[i], "full-path") == 0) {
path = atts[i + 1];
}
}
// Check if this is the standard OEBPS package
if (mediaType && path && strcmp(mediaType, "application/oebps-package+xml") == 0) {
self->fullPath = path;
}
}
}
void XMLCALL ContainerParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ContainerParser*>(userData);
if (self->state == IN_ROOTFILES && strcmp(name, "rootfiles") == 0) {
self->state = IN_CONTAINER;
} else if (self->state == IN_CONTAINER && strcmp(name, "container") == 0) {
self->state = START;
}
}

View File

@ -0,0 +1,32 @@
#pragma once
#include <Print.h>
#include <string>
#include "expat.h"
class ContainerParser final : public Print {
enum ParserState {
START,
IN_CONTAINER,
IN_ROOTFILES,
};
size_t remainingSize;
XML_Parser parser = nullptr;
ParserState state = START;
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void endElement(void* userData, const XML_Char* name);
public:
std::string fullPath;
explicit ContainerParser(const size_t xmlSize) : remainingSize(xmlSize) {}
bool setup();
bool teardown();
size_t write(uint8_t) override;
size_t write(const uint8_t* buffer, size_t size) override;
};

View File

@ -0,0 +1,161 @@
#include "ContentOpfParser.h"
#include <HardwareSerial.h>
#include <ZipFile.h>
bool ContentOpfParser::setup() {
parser = XML_ParserCreate(nullptr);
if (!parser) {
Serial.printf("[%lu] [COF] Couldn't allocate memory for parser\n", millis());
return false;
}
XML_SetUserData(parser, this);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, characterData);
return true;
}
bool ContentOpfParser::teardown() {
if (parser) {
XML_ParserFree(parser);
parser = nullptr;
}
return true;
}
size_t ContentOpfParser::write(const uint8_t data) { return write(&data, 1); }
size_t ContentOpfParser::write(const uint8_t* buffer, const size_t size) {
if (!parser) return 0;
const uint8_t* currentBufferPos = buffer;
auto remainingInBuffer = size;
while (remainingInBuffer > 0) {
void* const buf = XML_GetBuffer(parser, 1024);
if (!buf) {
Serial.printf("[%lu] [COF] Couldn't allocate memory for buffer\n", millis());
XML_ParserFree(parser);
parser = nullptr;
return 0;
}
const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024;
memcpy(buf, currentBufferPos, toRead);
if (XML_ParseBuffer(parser, static_cast<int>(toRead), remainingSize == toRead) == XML_STATUS_ERROR) {
Serial.printf("[%lu] [COF] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
XML_ErrorString(XML_GetErrorCode(parser)));
XML_ParserFree(parser);
parser = nullptr;
return 0;
}
currentBufferPos += toRead;
remainingInBuffer -= toRead;
remainingSize -= toRead;
}
return size;
}
void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ContentOpfParser*>(userData);
(void)atts;
if (self->state == START && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) {
self->state = IN_PACKAGE;
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) {
self->state = IN_METADATA;
return;
}
if (self->state == IN_METADATA && strcmp(name, "dc:title") == 0) {
self->state = IN_BOOK_TITLE;
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
self->state = IN_MANIFEST;
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
self->state = IN_SPINE;
return;
}
// TODO: Support book cover
// if (self->state == IN_METADATA && (strcmp(name, "meta") == 0 || strcmp(name, "opf:meta") == 0)) {
// }
if (self->state == IN_MANIFEST && (strcmp(name, "item") == 0 || strcmp(name, "opf:item") == 0)) {
std::string itemId;
std::string href;
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "id") == 0) {
itemId = atts[i + 1];
} else if (strcmp(atts[i], "href") == 0) {
href = self->baseContentPath + atts[i + 1];
}
}
self->items[itemId] = href;
return;
}
if (self->state == IN_SPINE && (strcmp(name, "itemref") == 0 || strcmp(name, "opf:itemref") == 0)) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "idref") == 0) {
self->spineRefs.emplace_back(atts[i + 1]);
break;
}
}
return;
}
}
void XMLCALL ContentOpfParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<ContentOpfParser*>(userData);
if (self->state == IN_BOOK_TITLE) {
self->title.append(s, len);
return;
}
}
void XMLCALL ContentOpfParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ContentOpfParser*>(userData);
(void)name;
if (self->state == IN_SPINE && (strcmp(name, "spine") == 0 || strcmp(name, "opf:spine") == 0)) {
self->state = IN_PACKAGE;
return;
}
if (self->state == IN_MANIFEST && (strcmp(name, "manifest") == 0 || strcmp(name, "opf:manifest") == 0)) {
self->state = IN_PACKAGE;
return;
}
if (self->state == IN_BOOK_TITLE && strcmp(name, "dc:title") == 0) {
self->state = IN_METADATA;
return;
}
if (self->state == IN_METADATA && (strcmp(name, "metadata") == 0 || strcmp(name, "opf:metadata") == 0)) {
self->state = IN_PACKAGE;
return;
}
if (self->state == IN_PACKAGE && (strcmp(name, "package") == 0 || strcmp(name, "opf:package") == 0)) {
self->state = START;
return;
}
}

View File

@ -0,0 +1,42 @@
#pragma once
#include <Print.h>
#include <map>
#include "Epub.h"
#include "expat.h"
class ContentOpfParser final : public Print {
enum ParserState {
START,
IN_PACKAGE,
IN_METADATA,
IN_BOOK_TITLE,
IN_MANIFEST,
IN_SPINE,
};
const std::string& baseContentPath;
size_t remainingSize;
XML_Parser parser = nullptr;
ParserState state = START;
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void characterData(void* userData, const XML_Char* s, int len);
static void endElement(void* userData, const XML_Char* name);
public:
std::string title;
std::string tocNcxPath;
std::map<std::string, std::string> items;
std::vector<std::string> spineRefs;
explicit ContentOpfParser(const std::string& baseContentPath, const size_t xmlSize)
: baseContentPath(baseContentPath), remainingSize(xmlSize) {}
bool setup();
bool teardown();
size_t write(uint8_t) override;
size_t write(const uint8_t* buffer, size_t size) override;
};

View File

@ -0,0 +1,165 @@
#include "TocNcxParser.h"
#include <HardwareSerial.h>
bool TocNcxParser::setup() {
parser = XML_ParserCreate(nullptr);
if (!parser) {
Serial.printf("[%lu] [TOC] Couldn't allocate memory for parser\n", millis());
return false;
}
XML_SetUserData(parser, this);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, characterData);
return true;
}
bool TocNcxParser::teardown() {
if (parser) {
XML_ParserFree(parser);
parser = nullptr;
}
return true;
}
size_t TocNcxParser::write(const uint8_t data) { return write(&data, 1); }
size_t TocNcxParser::write(const uint8_t* buffer, const size_t size) {
if (!parser) return 0;
const uint8_t* currentBufferPos = buffer;
auto remainingInBuffer = size;
while (remainingInBuffer > 0) {
void* const buf = XML_GetBuffer(parser, 1024);
if (!buf) {
Serial.printf("[%lu] [TOC] Couldn't allocate memory for buffer\n", millis());
return 0;
}
const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024;
memcpy(buf, currentBufferPos, toRead);
if (XML_ParseBuffer(parser, static_cast<int>(toRead), remainingSize == toRead) == XML_STATUS_ERROR) {
Serial.printf("[%lu] [TOC] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser),
XML_ErrorString(XML_GetErrorCode(parser)));
return 0;
}
currentBufferPos += toRead;
remainingInBuffer -= toRead;
remainingSize -= toRead;
}
return size;
}
void XMLCALL TocNcxParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
// NOTE: We rely on navPoint label and content coming before any nested navPoints, this will be fine:
// <navPoint>
// <navLabel><text>Chapter 1</text></navLabel>
// <content src="ch1.html"/>
// <navPoint> ...nested... </navPoint>
// </navPoint>
//
// This will NOT:
// <navPoint>
// <navPoint> ...nested... </navPoint>
// <navLabel><text>Chapter 1</text></navLabel>
// <content src="ch1.html"/>
// </navPoint>
auto* self = static_cast<TocNcxParser*>(userData);
if (self->state == START && strcmp(name, "ncx") == 0) {
self->state = IN_NCX;
return;
}
if (self->state == IN_NCX && strcmp(name, "navMap") == 0) {
self->state = IN_NAV_MAP;
return;
}
// Handles both top-level and nested navPoints
if ((self->state == IN_NAV_MAP || self->state == IN_NAV_POINT) && strcmp(name, "navPoint") == 0) {
self->state = IN_NAV_POINT;
self->currentDepth++;
self->currentLabel.clear();
self->currentSrc.clear();
return;
}
if (self->state == IN_NAV_POINT && strcmp(name, "navLabel") == 0) {
self->state = IN_NAV_LABEL;
return;
}
if (self->state == IN_NAV_LABEL && strcmp(name, "text") == 0) {
self->state = IN_NAV_LABEL_TEXT;
return;
}
if (self->state == IN_NAV_POINT && strcmp(name, "content") == 0) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "src") == 0) {
self->currentSrc = atts[i + 1];
break;
}
}
return;
}
}
void XMLCALL TocNcxParser::characterData(void* userData, const XML_Char* s, const int len) {
auto* self = static_cast<TocNcxParser*>(userData);
if (self->state == IN_NAV_LABEL_TEXT) {
self->currentLabel.append(s, len);
}
}
void XMLCALL TocNcxParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<TocNcxParser*>(userData);
if (self->state == IN_NAV_LABEL_TEXT && strcmp(name, "text") == 0) {
self->state = IN_NAV_LABEL;
return;
}
if (self->state == IN_NAV_LABEL && strcmp(name, "navLabel") == 0) {
self->state = IN_NAV_POINT;
return;
}
if (self->state == IN_NAV_POINT && strcmp(name, "navPoint") == 0) {
self->currentDepth--;
if (self->currentDepth == 0) {
self->state = IN_NAV_MAP;
}
return;
}
if (self->state == IN_NAV_POINT && strcmp(name, "content") == 0) {
// At this point (end of content tag), we likely have both Label (from previous tags) and Src.
// This is the safest place to push the data, assuming <navLabel> always comes before <content>.
// NCX spec says navLabel comes before content.
if (!self->currentLabel.empty() && !self->currentSrc.empty()) {
std::string href = self->baseContentPath + self->currentSrc;
std::string anchor;
const size_t pos = href.find('#');
if (pos != std::string::npos) {
anchor = href.substr(pos + 1);
href = href.substr(0, pos);
}
// Push to vector
self->toc.emplace_back(self->currentLabel, href, anchor, self->currentDepth);
// Clear them so we don't re-add them if there are weird XML structures
self->currentLabel.clear();
self->currentSrc.clear();
}
}
}

View File

@ -0,0 +1,37 @@
#pragma once
#include <Print.h>
#include <string>
#include <vector>
#include "Epub/EpubTocEntry.h"
#include "expat.h"
class TocNcxParser final : public Print {
enum ParserState { START, IN_NCX, IN_NAV_MAP, IN_NAV_POINT, IN_NAV_LABEL, IN_NAV_LABEL_TEXT, IN_CONTENT };
const std::string& baseContentPath;
size_t remainingSize;
XML_Parser parser = nullptr;
ParserState state = START;
std::string currentLabel;
std::string currentSrc;
size_t currentDepth = 0;
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void characterData(void* userData, const XML_Char* s, int len);
static void endElement(void* userData, const XML_Char* name);
public:
std::vector<EpubTocEntry> toc;
explicit TocNcxParser(const std::string& baseContentPath, const size_t xmlSize)
: baseContentPath(baseContentPath), remainingSize(xmlSize) {}
bool setup();
bool teardown();
size_t write(uint8_t) override;
size_t write(const uint8_t* buffer, size_t size) override;
};

View File

@ -162,9 +162,7 @@ int GfxRenderer::getLineHeight(const int fontId) const {
return fontMap.at(fontId).getData(REGULAR)->advanceY; return fontMap.at(fontId).getData(REGULAR)->advanceY;
} }
uint8_t *GfxRenderer::getFrameBuffer() const { uint8_t* GfxRenderer::getFrameBuffer() const { return einkDisplay.getFrameBuffer(); }
return einkDisplay.getFrameBuffer();
}
void GfxRenderer::swapBuffers() const { einkDisplay.swapBuffers(); } void GfxRenderer::swapBuffers() const { einkDisplay.swapBuffers(); }

View File

@ -40,7 +40,7 @@ bool ZipFile::loadFileStat(const char* filename, mz_zip_archive_file_stat* fileS
// find the file // find the file
mz_uint32 fileIndex = 0; mz_uint32 fileIndex = 0;
if (!mz_zip_reader_locate_file_v2(&zipArchive, filename, nullptr, 0, &fileIndex)) { if (!mz_zip_reader_locate_file_v2(&zipArchive, filename, nullptr, 0, &fileIndex)) {
Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis, filename); Serial.printf("[%lu] [ZIP] Could not find file %s\n", millis(), filename);
mz_zip_reader_end(&zipArchive); mz_zip_reader_end(&zipArchive);
return false; return false;
} }
@ -82,6 +82,16 @@ long ZipFile::getDataOffset(const mz_zip_archive_file_stat& fileStat) const {
return fileOffset + localHeaderSize + filenameLength + extraOffset; return fileOffset + localHeaderSize + filenameLength + extraOffset;
} }
bool ZipFile::getInflatedFileSize(const char* filename, size_t* size) const {
mz_zip_archive_file_stat fileStat;
if (!loadFileStat(filename, &fileStat)) {
return false;
}
*size = static_cast<size_t>(fileStat.m_uncomp_size);
return true;
}
uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) const { uint8_t* ZipFile::readFileToMemory(const char* filename, size_t* size, const bool trailingNullByte) const {
mz_zip_archive_file_stat fileStat; mz_zip_archive_file_stat fileStat;
if (!loadFileStat(filename, &fileStat)) { if (!loadFileStat(filename, &fileStat)) {
@ -268,7 +278,14 @@ bool ZipFile::readFileToStream(const char* filename, Print& out, const size_t ch
// Write output chunk // Write output chunk
if (outBytes > 0) { if (outBytes > 0) {
processedOutputBytes += outBytes; processedOutputBytes += outBytes;
out.write(outputBuffer + outputCursor, outBytes); if (out.write(outputBuffer + outputCursor, outBytes) != outBytes) {
Serial.printf("[%lu] [ZIP] Failed to write all output bytes to stream\n", millis());
fclose(file);
free(outputBuffer);
free(fileReadBuffer);
free(inflator);
return false;
}
// Update output position in buffer (with wraparound) // Update output position in buffer (with wraparound)
outputCursor = (outputCursor + outBytes) & (TINFL_LZ_DICT_SIZE - 1); outputCursor = (outputCursor + outBytes) & (TINFL_LZ_DICT_SIZE - 1);
} }

View File

@ -14,6 +14,7 @@ class ZipFile {
public: public:
explicit ZipFile(std::string filePath) : filePath(std::move(filePath)) {} explicit ZipFile(std::string filePath) : filePath(std::move(filePath)) {}
~ZipFile() = default; ~ZipFile() = default;
bool getInflatedFileSize(const char* filename, size_t* size) const;
uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false) const; uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false) const;
bool readFileToStream(const char* filename, Print& out, size_t chunkSize) const; bool readFileToStream(const char* filename, Print& out, size_t chunkSize) const;
}; };

View File

@ -29,7 +29,6 @@ board_build.partitions = partitions.csv
; Libraries ; Libraries
lib_deps = lib_deps =
https://github.com/leethomason/tinyxml2.git#11.0.0
BatteryMonitor=symlink://open-x4-sdk/libs/hardware/BatteryMonitor BatteryMonitor=symlink://open-x4-sdk/libs/hardware/BatteryMonitor
InputManager=symlink://open-x4-sdk/libs/hardware/InputManager InputManager=symlink://open-x4-sdk/libs/hardware/InputManager
EInkDisplay=symlink://open-x4-sdk/libs/display/EInkDisplay EInkDisplay=symlink://open-x4-sdk/libs/display/EInkDisplay

View File

@ -163,7 +163,7 @@ void EpubReaderScreen::renderScreen() {
const int w = textWidth + margin * 2; const int w = textWidth + margin * 2;
const int h = renderer.getLineHeight(READER_FONT_ID) + margin * 2; const int h = renderer.getLineHeight(READER_FONT_ID) + margin * 2;
renderer.grayscaleRevert(); renderer.grayscaleRevert();
uint8_t *fb1 = renderer.getFrameBuffer(); uint8_t* fb1 = renderer.getFrameBuffer();
renderer.swapBuffers(); renderer.swapBuffers();
memcpy(fb1, renderer.getFrameBuffer(), EInkDisplay::BUFFER_SIZE); memcpy(fb1, renderer.getFrameBuffer(), EInkDisplay::BUFFER_SIZE);
renderer.fillRect(x, y, w, h, 0); renderer.fillRect(x, y, w, h, 0);