diff --git a/README.md b/README.md index f015f718..d59df835 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ This project is **not affiliated with Xteink**; it's built as a community projec ## Features & Usage -- [x] EPUB parsing and rendering +- [x] EPUB parsing and rendering (EPUB 2 and EPUB 3) - [ ] Image support within EPUB - [x] Saved reading position - [x] File explorer with file picker diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index fde2e16a..234344d7 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -8,6 +8,7 @@ #include "Epub/parsers/ContainerParser.h" #include "Epub/parsers/ContentOpfParser.h" +#include "Epub/parsers/TocNavParser.h" #include "Epub/parsers/TocNcxParser.h" bool Epub::findContentOpfFile(std::string* contentOpfFile) const { @@ -80,6 +81,10 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNcxItem = opfParser.tocNcxPath; } + if (!opfParser.tocNavPath.empty()) { + tocNavItem = opfParser.tocNavPath; + } + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } @@ -141,6 +146,60 @@ bool Epub::parseTocNcxFile() const { return true; } +bool Epub::parseTocNavFile() const { + // the nav file should have been specified in the content.opf file (EPUB 3) + if (tocNavItem.empty()) { + Serial.printf("[%lu] [EBP] No nav file specified\n", millis()); + return false; + } + + Serial.printf("[%lu] [EBP] Parsing toc nav file: %s\n", millis(), tocNavItem.c_str()); + + const auto tmpNavPath = getCachePath() + "/toc.nav"; + FsFile tempNavFile; + if (!SdMan.openFileForWrite("EBP", tmpNavPath, tempNavFile)) { + return false; + } + readItemContentsToStream(tocNavItem, tempNavFile, 1024); + tempNavFile.close(); + if (!SdMan.openFileForRead("EBP", tmpNavPath, tempNavFile)) { + return false; + } + const auto navSize = tempNavFile.size(); + + TocNavParser navParser(contentBasePath, navSize, bookMetadataCache.get()); + + if (!navParser.setup()) { + Serial.printf("[%lu] [EBP] Could not setup toc nav parser\n", millis()); + return false; + } + + const auto navBuffer = static_cast(malloc(1024)); + if (!navBuffer) { + Serial.printf("[%lu] [EBP] Could not allocate memory for toc nav parser\n", millis()); + return false; + } + + while (tempNavFile.available()) { + const auto readSize = tempNavFile.read(navBuffer, 1024); + const auto processedSize = navParser.write(navBuffer, readSize); + + if (processedSize != readSize) { + Serial.printf("[%lu] [EBP] Could not process all toc nav data\n", millis()); + free(navBuffer); + tempNavFile.close(); + return false; + } + } + + free(navBuffer); + tempNavFile.close(); + SdMan.remove(tmpNavPath.c_str()); + + Serial.printf("[%lu] [EBP] Parsed TOC nav items\n", millis()); + return true; +} + // load in the meta data for the epub file bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); @@ -184,15 +243,31 @@ bool Epub::load(const bool buildIfMissing) { return false; } - // TOC Pass + // TOC Pass - try EPUB 3 nav first, fall back to NCX if (!bookMetadataCache->beginTocPass()) { Serial.printf("[%lu] [EBP] Could not begin writing toc pass\n", millis()); return false; } - if (!parseTocNcxFile()) { - Serial.printf("[%lu] [EBP] Could not parse toc\n", millis()); - return false; + + bool tocParsed = false; + + // Try EPUB 3 nav document first (preferred) + if (!tocNavItem.empty()) { + Serial.printf("[%lu] [EBP] Attempting to parse EPUB 3 nav document\n", millis()); + tocParsed = parseTocNavFile(); } + + // Fall back to NCX if nav parsing failed or wasn't available + if (!tocParsed && !tocNcxItem.empty()) { + Serial.printf("[%lu] [EBP] Falling back to NCX TOC\n", millis()); + tocParsed = parseTocNcxFile(); + } + + if (!tocParsed) { + Serial.printf("[%lu] [EBP] Warning: Could not parse any TOC format\n", millis()); + // Continue anyway - book will work without TOC + } + if (!bookMetadataCache->endTocPass()) { Serial.printf("[%lu] [EBP] Could not end writing toc pass\n", millis()); return false; diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 1b82462d..a6555e7e 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -12,8 +12,10 @@ class ZipFile; class Epub { - // the ncx file + // the ncx file (EPUB 2) std::string tocNcxItem; + // the nav file (EPUB 3) + std::string tocNavItem; // where is the EPUBfile? std::string filepath; // the base path for items in the EPUB file @@ -26,6 +28,7 @@ class Epub { bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; + bool parseTocNavFile() const; public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 1f99f018..18b81aae 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -7,9 +7,9 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 8; -constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint16_t) + - sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t); +constexpr uint8_t SECTION_FILE_VERSION = 9; +constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t); } // namespace uint32_t Section::onPageComplete(std::unique_ptr page) { @@ -30,19 +30,21 @@ uint32_t Section::onPageComplete(std::unique_ptr page) { } void Section::writeSectionFileHeader(const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const uint16_t viewportWidth, const uint16_t viewportHeight) { + const uint8_t paragraphAlignment, const uint16_t viewportWidth, + const uint16_t viewportHeight) { if (!file) { Serial.printf("[%lu] [SCT] File not open for writing header\n", millis()); return; } static_assert(HEADER_SIZE == sizeof(SECTION_FILE_VERSION) + sizeof(fontId) + sizeof(lineCompression) + - sizeof(extraParagraphSpacing) + sizeof(viewportWidth) + sizeof(viewportHeight) + - sizeof(pageCount) + sizeof(uint32_t), + sizeof(extraParagraphSpacing) + sizeof(paragraphAlignment) + sizeof(viewportWidth) + + sizeof(viewportHeight) + sizeof(pageCount) + sizeof(uint32_t), "Header size mismatch"); serialization::writePod(file, SECTION_FILE_VERSION); serialization::writePod(file, fontId); serialization::writePod(file, lineCompression); serialization::writePod(file, extraParagraphSpacing); + serialization::writePod(file, paragraphAlignment); serialization::writePod(file, viewportWidth); serialization::writePod(file, viewportHeight); serialization::writePod(file, pageCount); // Placeholder for page count (will be initially 0 when written) @@ -50,7 +52,8 @@ void Section::writeSectionFileHeader(const int fontId, const float lineCompressi } bool Section::loadSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const uint16_t viewportWidth, const uint16_t viewportHeight) { + const uint8_t paragraphAlignment, const uint16_t viewportWidth, + const uint16_t viewportHeight) { if (!SdMan.openFileForRead("SCT", filePath, file)) { return false; } @@ -70,15 +73,17 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con uint16_t fileViewportWidth, fileViewportHeight; float fileLineCompression; bool fileExtraParagraphSpacing; + uint8_t fileParagraphAlignment; serialization::readPod(file, fileFontId); serialization::readPod(file, fileLineCompression); serialization::readPod(file, fileExtraParagraphSpacing); + serialization::readPod(file, fileParagraphAlignment); serialization::readPod(file, fileViewportWidth); serialization::readPod(file, fileViewportHeight); if (fontId != fileFontId || lineCompression != fileLineCompression || - extraParagraphSpacing != fileExtraParagraphSpacing || viewportWidth != fileViewportWidth || - viewportHeight != fileViewportHeight) { + extraParagraphSpacing != fileExtraParagraphSpacing || paragraphAlignment != fileParagraphAlignment || + viewportWidth != fileViewportWidth || viewportHeight != fileViewportHeight) { file.close(); Serial.printf("[%lu] [SCT] Deserialization failed: Parameters do not match\n", millis()); clearCache(); @@ -109,8 +114,8 @@ bool Section::clearCache() const { } bool Section::createSectionFile(const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const uint16_t viewportWidth, const uint16_t viewportHeight, - const std::function& progressSetupFn, + const uint8_t paragraphAlignment, const uint16_t viewportWidth, + const uint16_t viewportHeight, const std::function& progressSetupFn, const std::function& progressFn) { constexpr uint32_t MIN_SIZE_FOR_PROGRESS = 50 * 1024; // 50KB const auto localPath = epub->getSpineItem(spineIndex).href; @@ -166,11 +171,13 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c if (!SdMan.openFileForWrite("SCT", filePath, file)) { return false; } - writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight); + writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, + viewportHeight); std::vector lut = {}; ChapterHtmlSlimParser visitor( - tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, viewportWidth, viewportHeight, + tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, + viewportHeight, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn); success = visitor.parseAndBuildPages(); diff --git a/lib/Epub/Epub/Section.h b/lib/Epub/Epub/Section.h index 55244d0e..bac95efd 100644 --- a/lib/Epub/Epub/Section.h +++ b/lib/Epub/Epub/Section.h @@ -14,8 +14,8 @@ class Section { std::string filePath; FsFile file; - void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, uint16_t viewportWidth, - uint16_t viewportHeight); + void writeSectionFileHeader(int fontId, float lineCompression, bool extraParagraphSpacing, uint8_t paragraphAlignment, + uint16_t viewportWidth, uint16_t viewportHeight); uint32_t onPageComplete(std::unique_ptr page); public: @@ -28,11 +28,12 @@ class Section { renderer(renderer), filePath(epub->getCachePath() + "/sections/" + std::to_string(spineIndex) + ".bin") {} ~Section() = default; - bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint16_t viewportWidth, - uint16_t viewportHeight); + bool loadSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint8_t paragraphAlignment, + uint16_t viewportWidth, uint16_t viewportHeight); bool clearCache() const; - bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint16_t viewportWidth, - uint16_t viewportHeight, const std::function& progressSetupFn = nullptr, + bool createSectionFile(int fontId, float lineCompression, bool extraParagraphSpacing, uint8_t paragraphAlignment, + uint16_t viewportWidth, uint16_t viewportHeight, + const std::function& progressSetupFn = nullptr, const std::function& progressFn = nullptr); std::unique_ptr loadPageFromSectionFile(); }; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 5cd53293..e5eb4d10 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -97,7 +97,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* if (strcmp(name, "br") == 0) { self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { - self->startNewTextBlock(TextBlock::JUSTIFIED); + self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); } } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); @@ -137,6 +137,21 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char continue; } + // Skip soft-hyphen with UTF-8 representation (U+00AD) = 0xC2 0xAD + const XML_Char SHY_BYTE_1 = static_cast(0xC2); + const XML_Char SHY_BYTE_2 = static_cast(0xAD); + // 1. Check for the start of the 2-byte Soft Hyphen sequence + if (s[i] == SHY_BYTE_1) { + // 2. Check if the next byte exists AND if it completes the sequence + // We must check i + 1 < len to prevent reading past the end of the buffer. + if ((i + 1 < len) && (s[i + 1] == SHY_BYTE_2)) { + // Sequence 0xC2 0xAD found! + // Skip the current byte (0xC2) and the next byte (0xAD) + i++; // Increment 'i' one more time to skip the 0xAD byte + continue; // Skip the rest of the loop and move to the next iteration + } + } + // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; @@ -206,7 +221,7 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n } bool ChapterHtmlSlimParser::parseAndBuildPages() { - startNewTextBlock(TextBlock::JUSTIFIED); + startNewTextBlock((TextBlock::Style)this->paragraphAlignment); const XML_Parser parser = XML_ParserCreate(nullptr); int done; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 795c2c33..c559e157 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -33,6 +33,7 @@ class ChapterHtmlSlimParser { int fontId; float lineCompression; bool extraParagraphSpacing; + uint8_t paragraphAlignment; uint16_t viewportWidth; uint16_t viewportHeight; @@ -46,7 +47,8 @@ class ChapterHtmlSlimParser { public: explicit ChapterHtmlSlimParser(const std::string& filepath, GfxRenderer& renderer, const int fontId, const float lineCompression, const bool extraParagraphSpacing, - const uint16_t viewportWidth, const uint16_t viewportHeight, + const uint8_t paragraphAlignment, const uint16_t viewportWidth, + const uint16_t viewportHeight, const std::function)>& completePageFn, const std::function& progressFn = nullptr) : filepath(filepath), @@ -54,6 +56,7 @@ class ChapterHtmlSlimParser { fontId(fontId), lineCompression(lineCompression), extraParagraphSpacing(extraParagraphSpacing), + paragraphAlignment(paragraphAlignment), viewportWidth(viewportWidth), viewportHeight(viewportHeight), completePageFn(completePageFn), diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index c9398778..2c90d01d 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -161,6 +161,7 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name std::string itemId; std::string href; std::string mediaType; + std::string properties; for (int i = 0; atts[i]; i += 2) { if (strcmp(atts[i], "id") == 0) { @@ -169,6 +170,8 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name href = self->baseContentPath + atts[i + 1]; } else if (strcmp(atts[i], "media-type") == 0) { mediaType = atts[i + 1]; + } else if (strcmp(atts[i], "properties") == 0) { + properties = atts[i + 1]; } } @@ -188,6 +191,15 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name href.c_str()); } } + + // EPUB 3: Check for nav document (properties contains "nav") + if (!properties.empty() && self->tocNavPath.empty()) { + // Properties is space-separated, check if "nav" is present as a word + if (properties == "nav" || properties.find("nav ") == 0 || properties.find(" nav") != std::string::npos) { + self->tocNavPath = href; + Serial.printf("[%lu] [COF] Found EPUB 3 nav document: %s\n", millis(), href.c_str()); + } + } return; } diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index 245fca3b..1940aaaf 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -35,6 +35,7 @@ class ContentOpfParser final : public Print { std::string title; std::string author; std::string tocNcxPath; + std::string tocNavPath; // EPUB 3 nav document path std::string coverItemHref; std::string textReferenceHref; diff --git a/lib/Epub/Epub/parsers/TocNavParser.cpp b/lib/Epub/Epub/parsers/TocNavParser.cpp new file mode 100644 index 00000000..b8a4e7fb --- /dev/null +++ b/lib/Epub/Epub/parsers/TocNavParser.cpp @@ -0,0 +1,184 @@ +#include "TocNavParser.h" + +#include + +#include "../BookMetadataCache.h" + +bool TocNavParser::setup() { + parser = XML_ParserCreate(nullptr); + if (!parser) { + Serial.printf("[%lu] [NAV] Couldn't allocate memory for parser\n", millis()); + return false; + } + + XML_SetUserData(parser, this); + XML_SetElementHandler(parser, startElement, endElement); + XML_SetCharacterDataHandler(parser, characterData); + return true; +} + +TocNavParser::~TocNavParser() { + if (parser) { + XML_StopParser(parser, XML_FALSE); + XML_SetElementHandler(parser, nullptr, nullptr); + XML_SetCharacterDataHandler(parser, nullptr); + XML_ParserFree(parser); + parser = nullptr; + } +} + +size_t TocNavParser::write(const uint8_t data) { return write(&data, 1); } + +size_t TocNavParser::write(const uint8_t* buffer, const size_t size) { + if (!parser) return 0; + + const uint8_t* currentBufferPos = buffer; + auto remainingInBuffer = size; + + while (remainingInBuffer > 0) { + void* const buf = XML_GetBuffer(parser, 1024); + if (!buf) { + Serial.printf("[%lu] [NAV] Couldn't allocate memory for buffer\n", millis()); + XML_StopParser(parser, XML_FALSE); + XML_SetElementHandler(parser, nullptr, nullptr); + XML_SetCharacterDataHandler(parser, nullptr); + XML_ParserFree(parser); + parser = nullptr; + return 0; + } + + const auto toRead = remainingInBuffer < 1024 ? remainingInBuffer : 1024; + memcpy(buf, currentBufferPos, toRead); + + if (XML_ParseBuffer(parser, static_cast(toRead), remainingSize == toRead) == XML_STATUS_ERROR) { + Serial.printf("[%lu] [NAV] Parse error at line %lu: %s\n", millis(), XML_GetCurrentLineNumber(parser), + XML_ErrorString(XML_GetErrorCode(parser))); + XML_StopParser(parser, XML_FALSE); + XML_SetElementHandler(parser, nullptr, nullptr); + XML_SetCharacterDataHandler(parser, nullptr); + XML_ParserFree(parser); + parser = nullptr; + return 0; + } + + currentBufferPos += toRead; + remainingInBuffer -= toRead; + remainingSize -= toRead; + } + return size; +} + +void XMLCALL TocNavParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { + auto* self = static_cast(userData); + + // Track HTML structure loosely - we mainly care about finding