Xteink-X4-crosspoint-reader/lib/Epub/Epub.cpp

#include "Epub.h"

#include <HardwareSerial.h>
#include <SD.h>
#include <ZipFile.h>

#include <map>

#include "Epub/FsHelpers.h"

bool Epub::findContentOpfFile(const ZipFile& zip, std::string& contentOpfFile) {
  // open up the meta data to find where the content.opf file lives
  size_t s;
  const auto metaInfo = reinterpret_cast<char*>(zip.readFileToMemory("META-INF/container.xml", &s, true));
  if (!metaInfo) {
    Serial.printf("[%lu] [EBP] Could not find META-INF/container.xml\n", millis());
    return false;
  }

  // parse the meta data
  tinyxml2::XMLDocument metaDataDoc;
  const auto result = metaDataDoc.Parse(metaInfo);
  free(metaInfo);

  if (result != tinyxml2::XML_SUCCESS) {
    Serial.printf("[%lu] [EBP] Could not parse META-INF/container.xml. Error: %d\n", millis(), result);
    return false;
  }

  const auto container = metaDataDoc.FirstChildElement("container");
  if (!container) {
    Serial.printf("[%lu] [EBP] Could not find container element in META-INF/container.xml\n", millis());
    return false;
  }

  const auto rootfiles = container->FirstChildElement("rootfiles");
  if (!rootfiles) {
    Serial.printf("[%lu] [EBP] Could not find rootfiles element in META-INF/container.xml\n", millis());
    return false;
  }

  // find the root file that has the media-type="application/oebps-package+xml"
  auto rootfile = rootfiles->FirstChildElement("rootfile");
  while (rootfile) {
    const char* mediaType = rootfile->Attribute("media-type");
    if (mediaType && strcmp(mediaType, "application/oebps-package+xml") == 0) {
      const char* full_path = rootfile->Attribute("full-path");
      if (full_path) {
        contentOpfFile = full_path;
        return true;
      }
    }
    rootfile = rootfile->NextSiblingElement("rootfile");
  }

  Serial.printf("[%lu] [EBP] Could not get path to content.opf file\n", millis());
  return false;
}

bool Epub::parseContentOpf(ZipFile& zip, std::string& content_opf_file) {
  // read in the content.opf file and parse it
  auto contents = reinterpret_cast<char*>(zip.readFileToMemory(content_opf_file.c_str(), nullptr, true));

  // parse the contents
  tinyxml2::XMLDocument doc;
  auto result = doc.Parse(contents);
  free(contents);

  if (result != tinyxml2::XML_SUCCESS) {
    Serial.printf("[%lu] [EBP] Error parsing content.opf - %s\n", millis(),
                  tinyxml2::XMLDocument::ErrorIDToName(result));
    return false;
  }

  auto package = doc.FirstChildElement("package");
  if (!package) package = doc.FirstChildElement("opf:package");

  if (!package) {
    Serial.printf("[%lu] [EBP] Could not find package element in content.opf\n", millis());
    return false;
  }

  // get the metadata - title and cover image
  auto metadata = package->FirstChildElement("metadata");
  if (!metadata) metadata = package->FirstChildElement("opf:metadata");
  if (!metadata) {
    Serial.printf("[%lu] [EBP] Missing metadata\n", millis());
    return false;
  }

  auto titleEl = metadata->FirstChildElement("dc:title");
  if (!titleEl) {
    Serial.printf("[%lu] [EBP] Missing title\n", millis());
    return false;
  }
  this->title = titleEl->GetText();

  auto cover = metadata->FirstChildElement("meta");
  if (!cover) cover = metadata->FirstChildElement("opf:meta");
  while (cover && cover->Attribute("name") && strcmp(cover->Attribute("name"), "cover") != 0) {
    cover = cover->NextSiblingElement("meta");
  }
  if (!cover) {
    Serial.printf("[%lu] [EBP] Missing cover\n", millis());
  }
  auto coverItem = cover ? cover->Attribute("content") : nullptr;

  // read the manifest and spine
  // the manifest gives us the names of the files
  // the spine gives us the order of the files
  // we can then read the files in the order they are in the spine
  auto manifest = package->FirstChildElement("manifest");
  if (!manifest) manifest = package->FirstChildElement("opf:manifest");
  if (!manifest) {
    Serial.printf("[%lu] [EBP] Missing manifest\n", millis());
    return false;
  }

  // create a mapping from id to file name
  auto item = manifest->FirstChildElement("item");
  if (!item) item = manifest->FirstChildElement("opf:item");
  std::map<std::string, std::string> items;

  while (item) {
    std::string itemId = item->Attribute("id");
    std::string href = contentBasePath + item->Attribute("href");

    // grab the cover image
    if (coverItem && itemId == coverItem) {
      coverImageItem = href;
    }

    // grab the ncx file
    if (itemId == "ncx" || itemId == "ncxtoc") {
      tocNcxItem = href;
    }

    items[itemId] = href;
    auto nextItem = item->NextSiblingElement("item");
    if (!nextItem) nextItem = item->NextSiblingElement("opf:item");
    item = nextItem;
  }

  // find the spine
  auto spineEl = package->FirstChildElement("spine");
  if (!spineEl) spineEl = package->FirstChildElement("opf:spine");
  if (!spineEl) {
    Serial.printf("[%lu] [EBP] Missing spine\n", millis());
    return false;
  }

  // read the spine
  auto itemref = spineEl->FirstChildElement("itemref");
  if (!itemref) itemref = spineEl->FirstChildElement("opf:itemref");
  while (itemref) {
    auto id = itemref->Attribute("idref");
    if (items.find(id) != items.end()) {
      spine.emplace_back(id, items[id]);
    }
    auto nextItemRef = itemref->NextSiblingElement("itemref");
    if (!nextItemRef) nextItemRef = itemref->NextSiblingElement("opf:itemref");
    itemref = nextItemRef;
  }
  return true;
}

bool Epub::parseTocNcxFile(const ZipFile& zip) {
  // the ncx file should have been specified in the content.opf file
  if (tocNcxItem.empty()) {
    Serial.printf("[%lu] [EBP] No ncx file specified\n", millis());
    return false;
  }

  const auto ncxData = reinterpret_cast<char*>(zip.readFileToMemory(tocNcxItem.c_str(), nullptr, true));
  if (!ncxData) {
    Serial.printf("[%lu] [EBP] Could not find %s\n", millis(), tocNcxItem.c_str());
    return false;
  }

  // Parse the Toc contents
  tinyxml2::XMLDocument doc;
  const auto result = doc.Parse(ncxData);
  free(ncxData);

  if (result != tinyxml2::XML_SUCCESS) {
    Serial.printf("[%lu] [EBP] Error parsing toc %s\n", millis(), tinyxml2::XMLDocument::ErrorIDToName(result));
    return false;
  }

  const auto ncx = doc.FirstChildElement("ncx");
  if (!ncx) {
    Serial.printf("[%lu] [EBP] Could not find first child ncx in toc\n", millis());
    return false;
  }

  const auto navMap = ncx->FirstChildElement("navMap");
  if (!navMap) {
    Serial.printf("[%lu] [EBP] Could not find navMap child in ncx\n", millis());
    return false;
  }

  recursivelyParseNavMap(navMap->FirstChildElement("navPoint"));
  return true;
}

void Epub::recursivelyParseNavMap(tinyxml2::XMLElement* element) {
  // Fills toc map
  while (element) {
    std::string navTitle = element->FirstChildElement("navLabel")->FirstChildElement("text")->FirstChild()->Value();
    const auto content = element->FirstChildElement("content");
    std::string href = contentBasePath + content->Attribute("src");
    // split the href on the # to get the href and the anchor
    const size_t pos = href.find('#');
    std::string anchor;

    if (pos != std::string::npos) {
      anchor = href.substr(pos + 1);
      href = href.substr(0, pos);
    }

    toc.emplace_back(navTitle, href, anchor, 0);

    tinyxml2::XMLElement* nestedNavPoint = element->FirstChildElement("navPoint");
    if (nestedNavPoint) {
      recursivelyParseNavMap(nestedNavPoint);
    }
    element = element->NextSiblingElement("navPoint");
  }
}

// load in the meta data for the epub file
bool Epub::load() {
  ZipFile zip("/sd" + filepath);

  std::string contentOpfFile;
  if (!findContentOpfFile(zip, contentOpfFile)) {
    Serial.printf("[%lu] [EBP] Could not open ePub\n", millis());
    return false;
  }

  contentBasePath = contentOpfFile.substr(0, contentOpfFile.find_last_of('/') + 1);

  if (!parseContentOpf(zip, contentOpfFile)) {
    return false;
  }

  if (!parseTocNcxFile(zip)) {
    return false;
  }

  return true;
}

bool Epub::clearCache() const {
  if (!SD.exists(cachePath.c_str())) {
    Serial.printf("[%lu] [EPB] Cache does not exist, no action needed\n", millis());
    return true;
  }

  if (!FsHelpers::removeDir(cachePath.c_str())) {
    Serial.printf("[%lu] [EPB] Failed to clear cache\n", millis());
    return false;
  }

  Serial.printf("[%lu] [EPB] Cache cleared successfully\n", millis());
  return true;
}

void Epub::setupCacheDir() const {
  if (SD.exists(cachePath.c_str())) {
    return;
  }

  // Loop over each segment of the cache path and create directories as needed
  for (size_t i = 1; i < cachePath.length(); i++) {
    if (cachePath[i] == '/') {
      SD.mkdir(cachePath.substr(0, i).c_str());
    }
  }
  SD.mkdir(cachePath.c_str());
}

const std::string& Epub::getCachePath() const { return cachePath; }

const std::string& Epub::getPath() const { return filepath; }

const std::string& Epub::getTitle() const { return title; }

const std::string& Epub::getCoverImageItem() const { return coverImageItem; }

std::string normalisePath(const std::string& path) {
  std::vector<std::string> components;
  std::string component;

  for (const auto c : path) {
    if (c == '/') {
      if (!component.empty()) {
        if (component == "..") {
          if (!components.empty()) {
            components.pop_back();
          }
        } else {
          components.push_back(component);
        }
        component.clear();
      }
    } else {
      component += c;
    }
  }

  if (!component.empty()) {
    components.push_back(component);
  }

  std::string result;
  for (const auto& c : components) {
    if (!result.empty()) {
      result += "/";
    }
    result += c;
  }

  return result;
}

uint8_t* Epub::readItemContentsToBytes(const std::string& itemHref, size_t* size, bool trailingNullByte) const {
  const ZipFile zip("/sd" + filepath);
  const std::string path = normalisePath(itemHref);

  const auto content = zip.readFileToMemory(path.c_str(), size, trailingNullByte);
  if (!content) {
    Serial.printf("[%lu] [EBP] Failed to read item %s\n", millis(), path.c_str());
    return nullptr;
  }

  return content;
}

bool Epub::readItemContentsToStream(const std::string& itemHref, Print& out, const size_t chunkSize) const {
  const ZipFile zip("/sd" + filepath);
  const std::string path = normalisePath(itemHref);

  return zip.readFileToStream(path.c_str(), out, chunkSize);
}

int Epub::getSpineItemsCount() const { return spine.size(); }

std::string& Epub::getSpineItem(const int spineIndex) {
  if (spineIndex < 0 || spineIndex >= spine.size()) {
    Serial.printf("[%lu] [EBP] getSpineItem index:%d is out of range\n", millis(), spineIndex);
    return spine.at(0).second;
  }

  return spine.at(spineIndex).second;
}

EpubTocEntry& Epub::getTocItem(const int tocTndex) {
  if (tocTndex < 0 || tocTndex >= toc.size()) {
    Serial.printf("[%lu] [EBP] getTocItem index:%d is out of range\n", millis(), tocTndex);
    return toc.at(0);
  }

  return toc.at(tocTndex);
}

int Epub::getTocItemsCount() const { return toc.size(); }

// work out the section index for a toc index
int Epub::getSpineIndexForTocIndex(const int tocIndex) const {
  // the toc entry should have an href that matches the spine item
  // so we can find the spine index by looking for the href
  for (int i = 0; i < spine.size(); i++) {
    if (spine[i].second == toc[tocIndex].href) {
      return i;
    }
  }

  Serial.printf("[%lu] [EBP] Section not found\n", millis());
  // not found - default to the start of the book
  return 0;
}

int Epub::getTocIndexForSpineIndex(const int spineIndex) const {
  // the toc entry should have an href that matches the spine item
  // so we can find the toc index by looking for the href
  for (int i = 0; i < toc.size(); i++) {
    if (toc[i].href == spine[spineIndex].second) {
      return i;
    }
  }

  Serial.printf("[%lu] [EBP] TOC item not found\n", millis());
  // not found - default to first item
  return 0;
}