mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2025-12-16 22:27:42 +03:00
* Move to smart pointers and split out ParsedText class * Cleanup ParsedText * Fix clearCache functions and clear section cache if page load fails * Bump Page and Section file versions * Combine removeDir implementations in Epub * Adjust screen margins
397 lines
12 KiB
C++
397 lines
12 KiB
C++
#include "Epub.h"
|
|
|
|
#include <HardwareSerial.h>
|
|
#include <SD.h>
|
|
#include <ZipFile.h>
|
|
|
|
#include <map>
|
|
|
|
#include "Epub/FsHelpers.h"
|
|
|
|
bool Epub::findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile) {
|
|
// open up the meta data to find where the content.opf file lives
|
|
size_t s;
|
|
const auto metaInfo = reinterpret_cast<char*>(zip.readFileToMemory("META-INF/container.xml", &s, true));
|
|
if (!metaInfo) {
|
|
Serial.printf("[%lu] [EBP] Could not find META-INF/container.xml\n", millis());
|
|
return false;
|
|
}
|
|
|
|
// parse the meta data
|
|
tinyxml2::XMLDocument metaDataDoc;
|
|
const auto result = metaDataDoc.Parse(metaInfo);
|
|
free(metaInfo);
|
|
|
|
if (result != tinyxml2::XML_SUCCESS) {
|
|
Serial.printf("[%lu] [EBP] Could not parse META-INF/container.xml. Error: %d\n", millis(), result);
|
|
return false;
|
|
}
|
|
|
|
const auto container = metaDataDoc.FirstChildElement("container");
|
|
if (!container) {
|
|
Serial.printf("[%lu] [EBP] Could not find container element in META-INF/container.xml\n", millis());
|
|
return false;
|
|
}
|
|
|
|
const auto rootfiles = container->FirstChildElement("rootfiles");
|
|
if (!rootfiles) {
|
|
Serial.printf("[%lu] [EBP] Could not find rootfiles element in META-INF/container.xml\n", millis());
|
|
return false;
|
|
}
|
|
|
|
// find the root file that has the media-type="application/oebps-package+xml"
|
|
auto rootfile = rootfiles->FirstChildElement("rootfile");
|
|
while (rootfile) {
|
|
const char* mediaType = rootfile->Attribute("media-type");
|
|
if (mediaType && strcmp(mediaType, "application/oebps-package+xml") == 0) {
|
|
const char* full_path = rootfile->Attribute("full-path");
|
|
if (full_path) {
|
|
contentOpfFile = full_path;
|
|
return true;
|
|
}
|
|
}
|
|
rootfile = rootfile->NextSiblingElement("rootfile");
|
|
}
|
|
|
|
Serial.printf("[%lu] [EBP] Could not get path to content.opf file\n", millis());
|
|
return false;
|
|
}
|
|
|
|
bool Epub::parseContentOpf(ZipFile& zip, std::string& content_opf_file) {
|
|
// read in the content.opf file and parse it
|
|
auto contents = reinterpret_cast<char*>(zip.readFileToMemory(content_opf_file.c_str(), nullptr, true));
|
|
|
|
// parse the contents
|
|
tinyxml2::XMLDocument doc;
|
|
auto result = doc.Parse(contents);
|
|
free(contents);
|
|
|
|
if (result != tinyxml2::XML_SUCCESS) {
|
|
Serial.printf("[%lu] [EBP] Error parsing content.opf - %s\n", millis(),
|
|
tinyxml2::XMLDocument::ErrorIDToName(result));
|
|
return false;
|
|
}
|
|
|
|
auto package = doc.FirstChildElement("package");
|
|
if (!package) package = doc.FirstChildElement("opf:package");
|
|
|
|
if (!package) {
|
|
Serial.printf("[%lu] [EBP] Could not find package element in content.opf\n", millis());
|
|
return false;
|
|
}
|
|
|
|
// get the metadata - title and cover image
|
|
auto metadata = package->FirstChildElement("metadata");
|
|
if (!metadata) metadata = package->FirstChildElement("opf:metadata");
|
|
if (!metadata) {
|
|
Serial.printf("[%lu] [EBP] Missing metadata\n", millis());
|
|
return false;
|
|
}
|
|
|
|
auto titleEl = metadata->FirstChildElement("dc:title");
|
|
if (!titleEl) {
|
|
Serial.printf("[%lu] [EBP] Missing title\n", millis());
|
|
return false;
|
|
}
|
|
this->title = titleEl->GetText();
|
|
|
|
auto cover = metadata->FirstChildElement("meta");
|
|
if (!cover) cover = metadata->FirstChildElement("opf:meta");
|
|
while (cover && cover->Attribute("name") && strcmp(cover->Attribute("name"), "cover") != 0) {
|
|
cover = cover->NextSiblingElement("meta");
|
|
}
|
|
if (!cover) {
|
|
Serial.printf("[%lu] [EBP] Missing cover\n", millis());
|
|
}
|
|
auto coverItem = cover ? cover->Attribute("content") : nullptr;
|
|
|
|
// read the manifest and spine
|
|
// the manifest gives us the names of the files
|
|
// the spine gives us the order of the files
|
|
// we can then read the files in the order they are in the spine
|
|
auto manifest = package->FirstChildElement("manifest");
|
|
if (!manifest) manifest = package->FirstChildElement("opf:manifest");
|
|
if (!manifest) {
|
|
Serial.printf("[%lu] [EBP] Missing manifest\n", millis());
|
|
return false;
|
|
}
|
|
|
|
// create a mapping from id to file name
|
|
auto item = manifest->FirstChildElement("item");
|
|
if (!item) item = manifest->FirstChildElement("opf:item");
|
|
std::map<std::string, std::string> items;
|
|
|
|
while (item) {
|
|
std::string itemId = item->Attribute("id");
|
|
std::string href = contentBasePath + item->Attribute("href");
|
|
|
|
// grab the cover image
|
|
if (coverItem && itemId == coverItem) {
|
|
coverImageItem = href;
|
|
}
|
|
|
|
// grab the ncx file
|
|
if (itemId == "ncx" || itemId == "ncxtoc") {
|
|
tocNcxItem = href;
|
|
}
|
|
|
|
items[itemId] = href;
|
|
auto nextItem = item->NextSiblingElement("item");
|
|
if (!nextItem) nextItem = item->NextSiblingElement("opf:item");
|
|
item = nextItem;
|
|
}
|
|
|
|
// find the spine
|
|
auto spineEl = package->FirstChildElement("spine");
|
|
if (!spineEl) spineEl = package->FirstChildElement("opf:spine");
|
|
if (!spineEl) {
|
|
Serial.printf("[%lu] [EBP] Missing spine\n", millis());
|
|
return false;
|
|
}
|
|
|
|
// read the spine
|
|
auto itemref = spineEl->FirstChildElement("itemref");
|
|
if (!itemref) itemref = spineEl->FirstChildElement("opf:itemref");
|
|
while (itemref) {
|
|
auto id = itemref->Attribute("idref");
|
|
if (items.find(id) != items.end()) {
|
|
spine.emplace_back(id, items[id]);
|
|
}
|
|
auto nextItemRef = itemref->NextSiblingElement("itemref");
|
|
if (!nextItemRef) nextItemRef = itemref->NextSiblingElement("opf:itemref");
|
|
itemref = nextItemRef;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Epub::parseTocNcxFile(const ZipFile& zip) {
|
|
// the ncx file should have been specified in the content.opf file
|
|
if (tocNcxItem.empty()) {
|
|
Serial.printf("[%lu] [EBP] No ncx file specified\n", millis());
|
|
return false;
|
|
}
|
|
|
|
const auto ncxData = reinterpret_cast<char*>(zip.readFileToMemory(tocNcxItem.c_str(), nullptr, true));
|
|
if (!ncxData) {
|
|
Serial.printf("[%lu] [EBP] Could not find %s\n", millis(), tocNcxItem.c_str());
|
|
return false;
|
|
}
|
|
|
|
// Parse the Toc contents
|
|
tinyxml2::XMLDocument doc;
|
|
const auto result = doc.Parse(ncxData);
|
|
free(ncxData);
|
|
|
|
if (result != tinyxml2::XML_SUCCESS) {
|
|
Serial.printf("[%lu] [EBP] Error parsing toc %s\n", millis(), tinyxml2::XMLDocument::ErrorIDToName(result));
|
|
return false;
|
|
}
|
|
|
|
const auto ncx = doc.FirstChildElement("ncx");
|
|
if (!ncx) {
|
|
Serial.printf("[%lu] [EBP] Could not find first child ncx in toc\n", millis());
|
|
return false;
|
|
}
|
|
|
|
const auto navMap = ncx->FirstChildElement("navMap");
|
|
if (!navMap) {
|
|
Serial.printf("[%lu] [EBP] Could not find navMap child in ncx\n", millis());
|
|
return false;
|
|
}
|
|
|
|
recursivelyParseNavMap(navMap->FirstChildElement("navPoint"));
|
|
return true;
|
|
}
|
|
|
|
void Epub::recursivelyParseNavMap(tinyxml2::XMLElement* element) {
|
|
// Fills toc map
|
|
while (element) {
|
|
std::string navTitle = element->FirstChildElement("navLabel")->FirstChildElement("text")->FirstChild()->Value();
|
|
const auto content = element->FirstChildElement("content");
|
|
std::string href = contentBasePath + content->Attribute("src");
|
|
// split the href on the # to get the href and the anchor
|
|
const size_t pos = href.find('#');
|
|
std::string anchor;
|
|
|
|
if (pos != std::string::npos) {
|
|
anchor = href.substr(pos + 1);
|
|
href = href.substr(0, pos);
|
|
}
|
|
|
|
toc.emplace_back(navTitle, href, anchor, 0);
|
|
|
|
tinyxml2::XMLElement* nestedNavPoint = element->FirstChildElement("navPoint");
|
|
if (nestedNavPoint) {
|
|
recursivelyParseNavMap(nestedNavPoint);
|
|
}
|
|
element = element->NextSiblingElement("navPoint");
|
|
}
|
|
}
|
|
|
|
// load in the meta data for the epub file
|
|
bool Epub::load() {
|
|
ZipFile zip("/sd" + filepath);
|
|
|
|
std::string contentOpfFile;
|
|
if (!findContentOpfFile(zip, contentOpfFile)) {
|
|
Serial.printf("[%lu] [EBP] Could not open ePub\n", millis());
|
|
return false;
|
|
}
|
|
|
|
contentBasePath = contentOpfFile.substr(0, contentOpfFile.find_last_of('/') + 1);
|
|
|
|
if (!parseContentOpf(zip, contentOpfFile)) {
|
|
return false;
|
|
}
|
|
|
|
if (!parseTocNcxFile(zip)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Epub::clearCache() const {
|
|
if (!SD.exists(cachePath.c_str())) {
|
|
Serial.printf("[%lu] [EPB] Cache does not exist, no action needed\n", millis());
|
|
return true;
|
|
}
|
|
|
|
if (!FsHelpers::removeDir(cachePath.c_str())) {
|
|
Serial.printf("[%lu] [EPB] Failed to clear cache\n", millis());
|
|
return false;
|
|
}
|
|
|
|
Serial.printf("[%lu] [EPB] Cache cleared successfully\n", millis());
|
|
return true;
|
|
}
|
|
|
|
void Epub::setupCacheDir() const {
|
|
if (SD.exists(cachePath.c_str())) {
|
|
return;
|
|
}
|
|
|
|
// Loop over each segment of the cache path and create directories as needed
|
|
for (size_t i = 1; i < cachePath.length(); i++) {
|
|
if (cachePath[i] == '/') {
|
|
SD.mkdir(cachePath.substr(0, i).c_str());
|
|
}
|
|
}
|
|
SD.mkdir(cachePath.c_str());
|
|
}
|
|
|
|
const std::string& Epub::getCachePath() const { return cachePath; }
|
|
|
|
const std::string& Epub::getPath() const { return filepath; }
|
|
|
|
const std::string& Epub::getTitle() const { return title; }
|
|
|
|
const std::string& Epub::getCoverImageItem() const { return coverImageItem; }
|
|
|
|
std::string normalisePath(const std::string& path) {
|
|
std::vector<std::string> components;
|
|
std::string component;
|
|
|
|
for (const auto c : path) {
|
|
if (c == '/') {
|
|
if (!component.empty()) {
|
|
if (component == "..") {
|
|
if (!components.empty()) {
|
|
components.pop_back();
|
|
}
|
|
} else {
|
|
components.push_back(component);
|
|
}
|
|
component.clear();
|
|
}
|
|
} else {
|
|
component += c;
|
|
}
|
|
}
|
|
|
|
if (!component.empty()) {
|
|
components.push_back(component);
|
|
}
|
|
|
|
std::string result;
|
|
for (const auto& c : components) {
|
|
if (!result.empty()) {
|
|
result += "/";
|
|
}
|
|
result += c;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, bool trailingNullByte) const {
|
|
const ZipFile zip("/sd" + filepath);
|
|
const std::string path = normalisePath(itemHref);
|
|
|
|
const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte);
|
|
if (!content) {
|
|
Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str());
|
|
return nullptr;
|
|
}
|
|
|
|
return content;
|
|
}
|
|
|
|
bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
|
|
const ZipFile zip("/sd" + filepath);
|
|
const std::string path = normalisePath(itemHref);
|
|
|
|
return zip.readFileToStream(path.c_str(), out, chunkSize);
|
|
}
|
|
|
|
int Epub::getSpineItemsCount() const { return spine.size(); }
|
|
|
|
std::string& Epub::getSpineItem(const int spineIndex) {
|
|
if (spineIndex < 0 || spineIndex >= spine.size()) {
|
|
Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex);
|
|
return spine.at(0).second;
|
|
}
|
|
|
|
return spine.at(spineIndex).second;
|
|
}
|
|
|
|
EpubTocEntry& Epub::getTocItem(const int tocTndex) {
|
|
if (tocTndex < 0 || tocTndex >= toc.size()) {
|
|
Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocTndex);
|
|
return toc.at(0);
|
|
}
|
|
|
|
return toc.at(tocTndex);
|
|
}
|
|
|
|
int Epub::getTocItemsCount() const { return toc.size(); }
|
|
|
|
// work out the section index for a toc index
|
|
int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
|
|
// the toc entry should have an href that matches the spine item
|
|
// so we can find the spine index by looking for the href
|
|
for (int i = 0; i < spine.size(); i++) {
|
|
if (spine[i].second == toc[tocIndex].href) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
Serial.printf("[%lu] [EBP] Section not found\n", millis());
|
|
// not found - default to the start of the book
|
|
return 0;
|
|
}
|
|
|
|
int Epub::getTocIndexForSpineIndex(const int spineIndex) const {
|
|
// the toc entry should have an href that matches the spine item
|
|
// so we can find the toc index by looking for the href
|
|
for (int i = 0; i < toc.size(); i++) {
|
|
if (toc[i].href == spine[spineIndex].second) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
Serial.printf("[%lu] [EBP] TOC item not found\n", millis());
|
|
// not found - default to first item
|
|
return 0;
|
|
}
|