diff --git a/lib/Epub/Epub/Page.cpp b/lib/Epub/Epub/Page.cpp index 92839eb7..dcb00392 100644 --- a/lib/Epub/Epub/Page.cpp +++ b/lib/Epub/Epub/Page.cpp @@ -1,8 +1,15 @@ #include "Page.h" #include +#include #include +#include + +namespace { +constexpr uint16_t MAX_PATH_LEN = 512; +} // namespace + void PageLine::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) { block->render(renderer, fontId, xPos + xOffset, yPos + yOffset); } @@ -25,6 +32,59 @@ std::unique_ptr PageLine::deserialize(FsFile& file) { return std::unique_ptr(new PageLine(std::move(tb), xPos, yPos)); } +void PageImage::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) { + (void)fontId; + FsFile file; + if (!SdMan.openFileForRead("PGE", bmpPath, file)) { + Serial.printf("[%lu] [PGE] Failed to open image %s\n", millis(), bmpPath.c_str()); + return; + } + + Bitmap bitmap(file, true); + const auto err = bitmap.parseHeaders(); + if (err != BmpReaderError::Ok) { + Serial.printf("[%lu] [PGE] Failed to parse bitmap %s: %s\n", millis(), bmpPath.c_str(), + Bitmap::errorToString(err)); + file.close(); + return; + } + + renderer.drawBitmap(bitmap, xPos + xOffset, yPos + yOffset, 0, 0); + file.close(); +} + +bool PageImage::serialize(FsFile& file) { + serialization::writePod(file, xPos); + serialization::writePod(file, yPos); + + const uint16_t pathLen = static_cast(std::min(bmpPath.size(), static_cast(MAX_PATH_LEN))); + serialization::writePod(file, pathLen); + return file.write(bmpPath.data(), pathLen) == pathLen; +} + +std::unique_ptr PageImage::deserialize(FsFile& file) { + int16_t xPos; + int16_t yPos; + uint16_t pathLen; + serialization::readPod(file, xPos); + serialization::readPod(file, yPos); + serialization::readPod(file, pathLen); + + if (pathLen == 0 || pathLen > MAX_PATH_LEN) { + Serial.printf("[%lu] [PGE] Invalid image path length: %u\n", millis(), pathLen); + return nullptr; + } + + std::string path; + path.resize(pathLen); + if (file.read(&path[0], pathLen) != pathLen) { + Serial.printf("[%lu] [PGE] Failed to read image path\n", millis()); + return nullptr; + } + + return std::unique_ptr(new PageImage(std::move(path), xPos, yPos)); +} + void Page::render(GfxRenderer& renderer, const int fontId, const int xOffset, const int yOffset) const { for (auto& element : elements) { element->render(renderer, fontId, xOffset, yOffset); @@ -36,8 +96,7 @@ bool Page::serialize(FsFile& file) const { serialization::writePod(file, count); for (const auto& el : elements) { - // Only PageLine exists currently - serialization::writePod(file, static_cast(TAG_PageLine)); + serialization::writePod(file, static_cast(el->tag())); if (!el->serialize(file)) { return false; } @@ -58,7 +117,16 @@ std::unique_ptr Page::deserialize(FsFile& file) { if (tag == TAG_PageLine) { auto pl = PageLine::deserialize(file); + if (!pl) { + return nullptr; + } page->elements.push_back(std::move(pl)); + } else if (tag == TAG_PageImage) { + auto pi = PageImage::deserialize(file); + if (!pi) { + return nullptr; + } + page->elements.push_back(std::move(pi)); } else { Serial.printf("[%lu] [PGE] Deserialization failed: Unknown tag %u\n", millis(), tag); return nullptr; diff --git a/lib/Epub/Epub/Page.h b/lib/Epub/Epub/Page.h index 20061941..975cc44c 100644 --- a/lib/Epub/Epub/Page.h +++ b/lib/Epub/Epub/Page.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -8,6 +9,7 @@ enum PageElementTag : uint8_t { TAG_PageLine = 1, + TAG_PageImage = 2, }; // represents something that has been added to a page @@ -19,6 +21,7 @@ class PageElement { virtual ~PageElement() = default; virtual void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) = 0; virtual bool serialize(FsFile& file) = 0; + virtual PageElementTag tag() const = 0; }; // a line from a block element @@ -30,9 +33,22 @@ class PageLine final : public PageElement { : PageElement(xPos, yPos), block(std::move(block)) {} void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override; bool serialize(FsFile& file) override; + PageElementTag tag() const override { return TAG_PageLine; } static std::unique_ptr deserialize(FsFile& file); }; +class PageImage final : public PageElement { + std::string bmpPath; + + public: + PageImage(std::string bmpPath, const int16_t xPos, const int16_t yPos) + : PageElement(xPos, yPos), bmpPath(std::move(bmpPath)) {} + void render(GfxRenderer& renderer, int fontId, int xOffset, int yOffset) override; + bool serialize(FsFile& file) override; + PageElementTag tag() const override { return TAG_PageImage; } + static std::unique_ptr deserialize(FsFile& file); +}; + class Page { public: // the list of block index and line numbers on this page diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index cf67108b..40e3558a 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -8,7 +8,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 10; +constexpr uint8_t SECTION_FILE_VERSION = 11; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(uint32_t); @@ -177,8 +177,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c std::vector lut = {}; ChapterHtmlSlimParser visitor( - tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, - viewportHeight, hyphenationEnabled, + epub, tmpHtmlPath, localPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, + viewportWidth, viewportHeight, hyphenationEnabled, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, popupFn); Hyphenator::setPreferredLanguage(epub->getLanguage()); success = visitor.parseAndBuildPages(); diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index ac1f537f..712f845a 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -1,8 +1,13 @@ #include "ChapterHtmlSlimParser.h" +#include "../../Epub.h" + +#include #include #include +#include #include +#include #include #include "../Page.h" @@ -30,6 +35,27 @@ constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\t'; } +bool isJpegPath(const std::string& path) { + if (path.size() < 4) { + return false; + } + std::string lower = path; + for (auto& c : lower) { + if (c >= 'A' && c <= 'Z') { + c = static_cast(c + ('a' - 'A')); + } + } + return lower.size() >= 4 && (lower.rfind(".jpg") == lower.size() - 4 || lower.rfind(".jpeg") == lower.size() - 5); +} + +std::string getBaseDir(const std::string& path) { + const auto lastSlash = path.find_last_of('/'); + if (lastSlash == std::string::npos) { + return ""; + } + return path.substr(0, lastSlash + 1); +} + // given the start and end of a tag, check to see if it matches a known tag bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { for (int i = 0; i < possible_tag_count; i++) { @@ -71,6 +97,98 @@ void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); } +void ChapterHtmlSlimParser::addImageToPage(const std::string& bmpPath, const int bmpWidth, const int bmpHeight) { + if (!currentPage) { + currentPage.reset(new Page()); + currentPageNextY = 0; + } + + if (currentPageNextY + bmpHeight > viewportHeight && currentPageNextY > 0) { + completePageFn(std::move(currentPage)); + currentPage.reset(new Page()); + currentPageNextY = 0; + } + + const int16_t xPos = static_cast(std::max(0, (static_cast(viewportWidth) - bmpWidth) / 2)); + currentPage->elements.push_back(std::make_shared(bmpPath, xPos, currentPageNextY)); + currentPageNextY += bmpHeight; + + if (extraParagraphSpacing) { + const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; + currentPageNextY += lineHeight / 2; + } +} + +std::string ChapterHtmlSlimParser::resolveImageHref(const std::string& src) const { + if (src.empty()) { + return ""; + } + if (src.rfind("http://", 0) == 0 || src.rfind("https://", 0) == 0 || src.rfind("data:", 0) == 0) { + return ""; + } + + if (!src.empty() && src[0] == '/') { + return FsHelpers::normalisePath(src.substr(1)); + } + + return FsHelpers::normalisePath(getBaseDir(chapterHref) + src); +} + +bool ChapterHtmlSlimParser::generateImageBmp(const std::string& imageHref, std::string* outBmpPath) const { + if (!epub || imageHref.empty() || !outBmpPath) { + return false; + } + + const auto cacheDir = epub->getCachePath() + "/images"; + SdMan.mkdir(cacheDir.c_str()); + + const uint64_t hrefHash = ZipFile::fnvHash64(imageHref.c_str(), imageHref.size()); + const auto bmpPath = cacheDir + "/img_" + std::to_string(hrefHash) + ".bmp"; + *outBmpPath = bmpPath; + + if (SdMan.exists(bmpPath.c_str())) { + return true; + } + + const auto tmpJpgPath = cacheDir + "/.tmp_" + std::to_string(hrefHash) + ".jpg"; + FsFile tmpJpg; + if (!SdMan.openFileForWrite("EHP", tmpJpgPath, tmpJpg)) { + return false; + } + const bool streamOk = epub->readItemContentsToStream(imageHref, tmpJpg, 1024); + tmpJpg.close(); + if (!streamOk) { + SdMan.remove(tmpJpgPath.c_str()); + return false; + } + + FsFile jpgFile; + if (!SdMan.openFileForRead("EHP", tmpJpgPath, jpgFile)) { + SdMan.remove(tmpJpgPath.c_str()); + return false; + } + + FsFile bmpFile; + if (!SdMan.openFileForWrite("EHP", bmpPath, bmpFile)) { + jpgFile.close(); + SdMan.remove(tmpJpgPath.c_str()); + return false; + } + + const bool success = + JpegToBmpConverter::jpegFileToBmpStreamWithSize(jpgFile, bmpFile, viewportWidth, viewportHeight); + jpgFile.close(); + bmpFile.close(); + SdMan.remove(tmpJpgPath.c_str()); + + if (!success) { + SdMan.remove(bmpPath.c_str()); + return false; + } + + return true; +} + void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); @@ -96,29 +214,69 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } if (matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS)) { - // TODO: Start processing image tags + std::string src; std::string alt = "[Image]"; if (atts != nullptr) { for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "src") == 0 && atts[i + 1]) { + src = atts[i + 1]; + } if (strcmp(atts[i], "alt") == 0) { if (strlen(atts[i + 1]) > 0) { alt = "[Image: " + std::string(atts[i + 1]) + "]"; } - break; } } } - Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); + const std::string imageHref = self->resolveImageHref(src); + bool renderedImage = false; + if (!imageHref.empty() && isJpegPath(imageHref)) { + if (self->partWordBufferIndex > 0) { + self->flushPartWordBuffer(); + } + if (self->currentTextBlock && !self->currentTextBlock->isEmpty()) { + self->makePages(); + } + + std::string bmpPath; + if (self->generateImageBmp(imageHref, &bmpPath)) { + FsFile bmpFile; + if (SdMan.openFileForRead("EHP", bmpPath, bmpFile)) { + Bitmap bitmap(bmpFile, true); + const auto err = bitmap.parseHeaders(); + if (err == BmpReaderError::Ok) { + self->addImageToPage(bmpPath, bitmap.getWidth(), bitmap.getHeight()); + renderedImage = true; + } else { + Serial.printf("[%lu] [EHP] Failed to parse bitmap %s: %s\n", millis(), bmpPath.c_str(), + Bitmap::errorToString(err)); + } + bmpFile.close(); + } + } else { + Serial.printf("[%lu] [EHP] Failed to generate image bmp for %s\n", millis(), imageHref.c_str()); + } + + if (renderedImage) { + self->startNewTextBlock(static_cast(self->paragraphAlignment)); + } + } + + if (!renderedImage) { + Serial.printf("[%lu] [EHP] Image unsupported, alt: %s\n", millis(), alt.c_str()); + self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->italicUntilDepth = min(self->italicUntilDepth, self->depth); + // Advance depth before processing character data (like you would for a element with text) + self->depth += 1; + self->characterData(userData, alt.c_str(), alt.length()); + + // Skip table contents (skip until parent as we pre-advanced depth above) + self->skipUntilDepth = self->depth - 1; + return; + } - self->startNewTextBlock(TextBlock::CENTER_ALIGN); - self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) self->depth += 1; - self->characterData(userData, alt.c_str(), alt.length()); - - // Skip table contents (skip until parent as we pre-advanced depth above) - self->skipUntilDepth = self->depth - 1; return; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 38202e6e..6c4bcd19 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -9,13 +9,16 @@ #include "../ParsedText.h" #include "../blocks/TextBlock.h" +class Epub; class Page; class GfxRenderer; #define MAX_WORD_SIZE 200 class ChapterHtmlSlimParser { + const std::shared_ptr epub; const std::string& filepath; + const std::string chapterHref; GfxRenderer& renderer; std::function)> completePageFn; std::function popupFn; // Popup callback @@ -41,19 +44,25 @@ class ChapterHtmlSlimParser { void startNewTextBlock(TextBlock::Style style); void flushPartWordBuffer(); void makePages(); + void addImageToPage(const std::string& bmpPath, int bmpWidth, int bmpHeight); + std::string resolveImageHref(const std::string& src) const; + bool generateImageBmp(const std::string& imageHref, std::string* outBmpPath) const; // XML callbacks static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); static void XMLCALL characterData(void* userData, const XML_Char* s, int len); static void XMLCALL endElement(void* userData, const XML_Char* name); public: - explicit ChapterHtmlSlimParser(const std::string& filepath, GfxRenderer& renderer, const int fontId, + explicit ChapterHtmlSlimParser(const std::shared_ptr& epub, const std::string& filepath, + const std::string& chapterHref, GfxRenderer& renderer, const int fontId, const float lineCompression, const bool extraParagraphSpacing, const uint8_t paragraphAlignment, const uint16_t viewportWidth, const uint16_t viewportHeight, const bool hyphenationEnabled, const std::function)>& completePageFn, const std::function& popupFn = nullptr) - : filepath(filepath), + : epub(epub), + filepath(filepath), + chapterHref(chapterHref), renderer(renderer), fontId(fontId), lineCompression(lineCompression),