From 2cf799f45b582ad046ea67ebca31c78e7d334444 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Thu, 5 Feb 2026 05:28:10 -0500 Subject: [PATCH] feat: Add CSS parsing and CSS support in EPUBs (#411) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary * **What is the goal of this PR?** - Adds basic CSS parsing to EPUBs and determine the CSS rules when rendering to the screen so that text is styled correctly. Currently supports bold, underline, italics, margin, padding, and text alignment ## Additional Context - My main reason for wanting this is that the book I'm currently reading, Carl's Doomsday Scenario (2nd in the Dungeon Crawler Carl series), relies _a lot_ on styled text for telling parts of the story. When text is bolded, it's supposed to be a message that's rendered "on-screen" in the story. When characters are "chatting" with each other, the text is bolded and their names are underlined. Plus, normal emphasis is provided with italicizing words here and there. So, this greatly improves my experience reading this book on the Xteink, and I figured it was useful enough for others too. - For transparency: I'm a software engineer, but I'm mostly frontend and TypeScript/JavaScript. It's been _years_ since I did any C/C++, so I would not be surprised if I'm doing something dumb along the way in this code. Please don't hesitate to ask for changes if something looks off. I heavily relied on Claude Code for help, and I had a lot of inspiration from how [microreader](https://github.com/CidVonHighwind/microreader) achieves their CSS parsing and styling. I did give this as good of a code review as I could and went through everything, and _it works on my machine_ 😄 ### Before ![IMG_6271](https://github.com/user-attachments/assets/dba7554d-efb6-4d13-88bc-8b83cd1fc615) ![IMG_6272](https://github.com/user-attachments/assets/61ba2de0-87c9-4f39-956f-013da4fe20a4) ### After ![IMG_6268](https://github.com/user-attachments/assets/ebe11796-cca9-4a46-b9c7-0709c7932818) ![IMG_6269](https://github.com/user-attachments/assets/e89c33dc-ff47-4bb7-855e-863fe44b3202) --- ### AI Usage Did you use AI tools to help write this code? **YES**, Claude Code --- lib/EpdFont/EpdFontFamily.cpp | 24 +- lib/EpdFont/EpdFontFamily.h | 2 +- lib/Epub/Epub.cpp | 87 ++- lib/Epub/Epub.h | 11 +- lib/Epub/Epub/ParsedText.cpp | 78 +- lib/Epub/Epub/ParsedText.h | 15 +- lib/Epub/Epub/Section.cpp | 5 +- lib/Epub/Epub/blocks/BlockStyle.h | 90 +++ lib/Epub/Epub/blocks/TextBlock.cpp | 62 +- lib/Epub/Epub/blocks/TextBlock.h | 22 +- lib/Epub/Epub/css/CssParser.cpp | 697 ++++++++++++++++++ lib/Epub/Epub/css/CssParser.h | 114 +++ lib/Epub/Epub/css/CssStyle.h | 191 +++++ .../Epub/parsers/ChapterHtmlSlimParser.cpp | 296 ++++++-- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 27 +- lib/Epub/Epub/parsers/ContentOpfParser.cpp | 6 + lib/Epub/Epub/parsers/ContentOpfParser.h | 1 + lib/GfxRenderer/GfxRenderer.cpp | 14 + lib/GfxRenderer/GfxRenderer.h | 1 + src/activities/boot_sleep/SleepActivity.cpp | 3 +- src/activities/home/HomeActivity.cpp | 3 +- 21 files changed, 1623 insertions(+), 126 deletions(-) create mode 100644 lib/Epub/Epub/blocks/BlockStyle.h create mode 100644 lib/Epub/Epub/css/CssParser.cpp create mode 100644 lib/Epub/Epub/css/CssParser.h create mode 100644 lib/Epub/Epub/css/CssStyle.h diff --git a/lib/EpdFont/EpdFontFamily.cpp b/lib/EpdFont/EpdFontFamily.cpp index 74a6677f..821153e3 100644 --- a/lib/EpdFont/EpdFontFamily.cpp +++ b/lib/EpdFont/EpdFontFamily.cpp @@ -1,23 +1,19 @@ #include "EpdFontFamily.h" const EpdFont* EpdFontFamily::getFont(const Style style) const { - if (style == BOLD && bold) { + // Extract font style bits (ignore UNDERLINE bit for font selection) + const bool hasBold = (style & BOLD) != 0; + const bool hasItalic = (style & ITALIC) != 0; + + if (hasBold && hasItalic) { + if (boldItalic) return boldItalic; + if (bold) return bold; + if (italic) return italic; + } else if (hasBold && bold) { return bold; - } - if (style == ITALIC && italic) { + } else if (hasItalic && italic) { return italic; } - if (style == BOLD_ITALIC) { - if (boldItalic) { - return boldItalic; - } - if (bold) { - return bold; - } - if (italic) { - return italic; - } - } return regular; } diff --git a/lib/EpdFont/EpdFontFamily.h b/lib/EpdFont/EpdFontFamily.h index 92043d1f..64fd9953 100644 --- a/lib/EpdFont/EpdFontFamily.h +++ b/lib/EpdFont/EpdFontFamily.h @@ -3,7 +3,7 @@ class EpdFontFamily { public: - enum Style : uint8_t { REGULAR = 0, BOLD = 1, ITALIC = 2, BOLD_ITALIC = 3 }; + enum Style : uint8_t { REGULAR = 0, BOLD = 1, ITALIC = 2, BOLD_ITALIC = 3, UNDERLINE = 4 }; explicit EpdFontFamily(const EpdFont* regular, const EpdFont* bold = nullptr, const EpdFont* italic = nullptr, const EpdFont* boldItalic = nullptr) diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 7559e3b3..5dc02358 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -86,6 +86,10 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNavItem = opfParser.tocNavPath; } + if (!opfParser.cssFiles.empty()) { + cssFiles = opfParser.cssFiles; + } + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } @@ -204,15 +208,91 @@ bool Epub::parseTocNavFile() const { return true; } +std::string Epub::getCssRulesCache() const { return cachePath + "/css_rules.cache"; } + +bool Epub::loadCssRulesFromCache() const { + FsFile cssCacheFile; + if (SdMan.openFileForRead("EBP", getCssRulesCache(), cssCacheFile)) { + if (cssParser->loadFromCache(cssCacheFile)) { + cssCacheFile.close(); + Serial.printf("[%lu] [EBP] Loaded CSS rules from cache\n", millis()); + return true; + } + cssCacheFile.close(); + Serial.printf("[%lu] [EBP] CSS cache invalid, reparsing\n", millis()); + } + return false; +} + +void Epub::parseCssFiles() const { + if (cssFiles.empty()) { + Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis()); + } + + // Try to load from CSS cache first + if (!loadCssRulesFromCache()) { + // Cache miss - parse CSS files + for (const auto& cssPath : cssFiles) { + Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str()); + + // Extract CSS file to temp location + const auto tmpCssPath = getCachePath() + "/.tmp.css"; + FsFile tempCssFile; + if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis()); + continue; + } + if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) { + Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str()); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + tempCssFile.close(); + + // Parse the CSS file + if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis()); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + cssParser->loadFromStream(tempCssFile); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + } + + // Save to cache for next time + FsFile cssCacheFile; + if (SdMan.openFileForWrite("EBP", getCssRulesCache(), cssCacheFile)) { + cssParser->saveToCache(cssCacheFile); + cssCacheFile.close(); + } + + Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(), + cssFiles.size()); + } +} + // load in the meta data for the epub file -bool Epub::load(const bool buildIfMissing) { +bool Epub::load(const bool buildIfMissing, const bool skipLoadingCss) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); // Initialize spine/TOC cache bookMetadataCache.reset(new BookMetadataCache(cachePath)); + // Always create CssParser - needed for inline style parsing even without CSS files + cssParser.reset(new CssParser()); // Try to load existing cache first if (bookMetadataCache->load()) { + if (!skipLoadingCss && !loadCssRulesFromCache()) { + Serial.printf("[%lu] [EBP] Warning: CSS rules cache not found, attempting to parse CSS files\n", millis()); + // to get CSS file list + if (!parseContentOpf(bookMetadataCache->coreMetadata)) { + Serial.printf("[%lu] [EBP] Could not parse content.opf from cached bookMetadata for CSS files\n", millis()); + // continue anyway - book will work without CSS and we'll still load any inline style CSS + } + parseCssFiles(); + } Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } @@ -309,6 +389,11 @@ bool Epub::load(const bool buildIfMissing) { return false; } + if (!skipLoadingCss) { + // Parse CSS files after cache reload + parseCssFiles(); + } + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 7a21efd5..85a601a5 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -8,6 +8,7 @@ #include #include "Epub/BookMetadataCache.h" +#include "Epub/css/CssParser.h" class ZipFile; @@ -24,11 +25,18 @@ class Epub { std::string cachePath; // Spine and TOC cache std::unique_ptr bookMetadataCache; + // CSS parser for styling + std::unique_ptr cssParser; + // CSS files + std::vector cssFiles; bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; + void parseCssFiles() const; + std::string getCssRulesCache() const; + bool loadCssRulesFromCache() const; public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -37,7 +45,7 @@ class Epub { } ~Epub() = default; std::string& getBasePath() { return contentBasePath; } - bool load(bool buildIfMissing = true); + bool load(bool buildIfMissing = true, bool skipLoadingCss = false); bool clearCache() const; void setupCacheDir() const; const std::string& getCachePath() const; @@ -64,4 +72,5 @@ class Epub { size_t getBookSize() const; float calculateProgress(int currentSpineIndex, float currentSpineRead) const; + const CssParser* getCssParser() const { return cssParser.get(); } }; diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 81d688ec..aca85581 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -49,11 +49,15 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s } // namespace -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style style, const bool underline) { if (word.empty()) return; words.push_back(std::move(word)); - wordStyles.push_back(fontStyle); + EpdFontFamily::Style combinedStyle = style; + if (underline) { + combinedStyle = static_cast(combinedStyle | EpdFontFamily::UNDERLINE); + } + wordStyles.push_back(combinedStyle); } // Consumes data to minimize memory usage @@ -109,10 +113,19 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c return {}; } + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; + // Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation. for (size_t i = 0; i < wordWidths.size(); ++i) { - while (wordWidths[i] > pageWidth) { - if (!hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { + // First word needs to fit in reduced width if there's an indent + const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; + while (wordWidths[i] > effectiveWidth) { + if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { break; } } @@ -133,11 +146,14 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c int currlen = -spaceWidth; dp[i] = MAX_COST; + // First line has reduced width due to text-indent + const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; + for (size_t j = i; j < totalWordCount; ++j) { // Current line length: previous width + space + current word width currlen += wordWidths[j] + spaceWidth; - if (currlen > pageWidth) { + if (currlen > effectivePageWidth) { break; } @@ -145,7 +161,7 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c if (j == totalWordCount - 1) { cost = 0; // Last line } else { - const int remainingSpace = pageWidth - currlen; + const int remainingSpace = effectivePageWidth - currlen; // Use long long for the square to prevent overflow const long long cost_ll = static_cast(remainingSpace) * remainingSpace + dp[j + 1]; @@ -200,7 +216,11 @@ void ParsedText::applyParagraphIndent() { return; } - if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) { + if (blockStyle.textIndentDefined) { + // CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace + // The actual indent positioning is handled in extractLine() + } else if (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) { + // No CSS text-indent defined - use EmSpace fallback for visual indent words.front().insert(0, "\xe2\x80\x83"); } } @@ -209,13 +229,24 @@ void ParsedText::applyParagraphIndent() { std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth, const int spaceWidth, std::vector& wordWidths) { + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; + std::vector lineBreakIndices; size_t currentIndex = 0; + bool isFirstLine = true; while (currentIndex < wordWidths.size()) { const size_t lineStart = currentIndex; int lineWidth = 0; + // First line has reduced width due to text-indent + const int effectivePageWidth = isFirstLine ? pageWidth - firstLineIndent : pageWidth; + // Consume as many words as possible for current line, splitting when prefixes fit while (currentIndex < wordWidths.size()) { const bool isFirstWord = currentIndex == lineStart; @@ -223,14 +254,14 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r const int candidateWidth = spacing + wordWidths[currentIndex]; // Word fits on current line - if (lineWidth + candidateWidth <= pageWidth) { + if (lineWidth + candidateWidth <= effectivePageWidth) { lineWidth += candidateWidth; ++currentIndex; continue; } // Word would overflow — try to split based on hyphenation points - const int availableWidth = pageWidth - lineWidth - spacing; + const int availableWidth = effectivePageWidth - lineWidth - spacing; const bool allowFallbackBreaks = isFirstWord; // Only for first word on line if (availableWidth > 0 && @@ -250,6 +281,7 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r } lineBreakIndices.push_back(currentIndex); + isFirstLine = false; } return lineBreakIndices; @@ -334,27 +366,36 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lineWordCount = lineBreak - lastBreakAt; + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const bool isFirstLine = breakIndex == 0; + const int firstLineIndent = + isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; + // Calculate total word width for this line int lineWordWidthSum = 0; for (size_t i = lastBreakAt; i < lineBreak; i++) { lineWordWidthSum += wordWidths[i]; } - // Calculate spacing - const int spareSpace = pageWidth - lineWordWidthSum; + // Calculate spacing (account for indent reducing effective page width on first line) + const int effectivePageWidth = pageWidth - firstLineIndent; + const int spareSpace = effectivePageWidth - lineWordWidthSum; int spacing = spaceWidth; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { + if (blockStyle.alignment == CssTextAlign::Justify && !isLastLine && lineWordCount >= 2) { spacing = spareSpace / (lineWordCount - 1); } - // Calculate initial x position - uint16_t xpos = 0; - if (style == TextBlock::RIGHT_ALIGN) { + // Calculate initial x position (first line starts at indent for left/justified text) + auto xpos = static_cast(firstLineIndent); + if (blockStyle.alignment == CssTextAlign::Right) { xpos = spareSpace - (lineWordCount - 1) * spaceWidth; - } else if (style == TextBlock::CENTER_ALIGN) { + } else if (blockStyle.alignment == CssTextAlign::Center) { xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; } @@ -384,5 +425,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const } } - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); -} \ No newline at end of file + processLine( + std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle)); +} diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index e72db7ef..a13d13b5 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -8,6 +8,7 @@ #include #include +#include "blocks/BlockStyle.h" #include "blocks/TextBlock.h" class GfxRenderer; @@ -15,7 +16,7 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; - TextBlock::Style style; + BlockStyle blockStyle; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -32,14 +33,14 @@ class ParsedText { std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: - explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, - const bool hyphenationEnabled = false) - : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} + explicit ParsedText(const bool extraParagraphSpacing, const bool hyphenationEnabled = false, + const BlockStyle& blockStyle = BlockStyle()) + : blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; - void addWord(std::string word, EpdFontFamily::Style fontStyle); - void setStyle(const TextBlock::Style style) { this->style = style; } - TextBlock::Style getStyle() const { return style; } + void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } + BlockStyle& getBlockStyle() { return blockStyle; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index cf67108b..9cb70027 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -8,7 +8,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 10; +constexpr uint8_t SECTION_FILE_VERSION = 11; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(uint32_t); @@ -179,7 +179,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, - [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, popupFn); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, popupFn, + epub->getCssParser()); Hyphenator::setPreferredLanguage(epub->getLanguage()); success = visitor.parseAndBuildPages(); diff --git a/lib/Epub/Epub/blocks/BlockStyle.h b/lib/Epub/Epub/blocks/BlockStyle.h new file mode 100644 index 00000000..5c26a21d --- /dev/null +++ b/lib/Epub/Epub/blocks/BlockStyle.h @@ -0,0 +1,90 @@ +#pragma once + +#include + +#include "Epub/css/CssStyle.h" + +/** + * BlockStyle - Block-level styling properties + */ +struct BlockStyle { + CssTextAlign alignment = CssTextAlign::Justify; + + // Spacing (in pixels) + int16_t marginTop = 0; + int16_t marginBottom = 0; + int16_t marginLeft = 0; + int16_t marginRight = 0; + int16_t paddingTop = 0; // treated same as margin for rendering + int16_t paddingBottom = 0; // treated same as margin for rendering + int16_t paddingLeft = 0; // treated same as margin for rendering + int16_t paddingRight = 0; // treated same as margin for rendering + int16_t textIndent = 0; + bool textIndentDefined = false; // true if text-indent was explicitly set in CSS + bool textAlignDefined = false; // true if text-align was explicitly set in CSS + + // Combined horizontal insets (margin + padding) + [[nodiscard]] int16_t leftInset() const { return marginLeft + paddingLeft; } + [[nodiscard]] int16_t rightInset() const { return marginRight + paddingRight; } + [[nodiscard]] int16_t totalHorizontalInset() const { return leftInset() + rightInset(); } + + // Combine with another block style. Useful for parent -> child styles, where the child style should be + // applied on top of the parent's style to get the combined style. + BlockStyle getCombinedBlockStyle(const BlockStyle& child) const { + BlockStyle combinedBlockStyle; + + combinedBlockStyle.marginTop = static_cast(child.marginTop + marginTop); + combinedBlockStyle.marginBottom = static_cast(child.marginBottom + marginBottom); + combinedBlockStyle.marginLeft = static_cast(child.marginLeft + marginLeft); + combinedBlockStyle.marginRight = static_cast(child.marginRight + marginRight); + + combinedBlockStyle.paddingTop = static_cast(child.paddingTop + paddingTop); + combinedBlockStyle.paddingBottom = static_cast(child.paddingBottom + paddingBottom); + combinedBlockStyle.paddingLeft = static_cast(child.paddingLeft + paddingLeft); + combinedBlockStyle.paddingRight = static_cast(child.paddingRight + paddingRight); + // Text indent: use child's if defined + if (child.textIndentDefined) { + combinedBlockStyle.textIndent = child.textIndent; + combinedBlockStyle.textIndentDefined = true; + } else { + combinedBlockStyle.textIndent = textIndent; + combinedBlockStyle.textIndentDefined = textIndentDefined; + } + // Text align: use child's if defined + if (child.textAlignDefined) { + combinedBlockStyle.alignment = child.alignment; + combinedBlockStyle.textAlignDefined = true; + } else { + combinedBlockStyle.alignment = alignment; + combinedBlockStyle.textAlignDefined = textAlignDefined; + } + return combinedBlockStyle; + } + + // Create a BlockStyle from CSS style properties, resolving CssLength values to pixels + // emSize is the current font line height, used for em/rem unit conversion + // paragraphAlignment is the user's paragraphAlignment setting preference + static BlockStyle fromCssStyle(const CssStyle& cssStyle, const float emSize, const CssTextAlign paragraphAlignment) { + BlockStyle blockStyle; + // Resolve all CssLength values to pixels using the current font's em size + blockStyle.marginTop = cssStyle.marginTop.toPixelsInt16(emSize); + blockStyle.marginBottom = cssStyle.marginBottom.toPixelsInt16(emSize); + blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize); + blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize); + + blockStyle.paddingTop = cssStyle.paddingTop.toPixelsInt16(emSize); + blockStyle.paddingBottom = cssStyle.paddingBottom.toPixelsInt16(emSize); + blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); + blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); + + blockStyle.textIndent = cssStyle.textIndent.toPixelsInt16(emSize); + blockStyle.textIndentDefined = cssStyle.hasTextIndent(); + blockStyle.textAlignDefined = cssStyle.hasTextAlign(); + if (blockStyle.textAlignDefined) { + blockStyle.alignment = cssStyle.textAlign; + } else { + blockStyle.alignment = paragraphAlignment; + } + return blockStyle; + } +}; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 2a15aef0..3ab25558 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -14,9 +14,32 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); - for (size_t i = 0; i < words.size(); i++) { - renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt); + const int wordX = *wordXposIt + x; + const EpdFontFamily::Style currentStyle = *wordStylesIt; + renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle); + + if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) { + const std::string& w = *wordIt; + const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle); + // y is the top of the text line; add ascender to reach baseline, then offset 2px below + const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; + + int startX = wordX; + int underlineWidth = fullWordWidth; + + // if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line + if (w.size() >= 3 && static_cast(w[0]) == 0xE2 && static_cast(w[1]) == 0x80 && + static_cast(w[2]) == 0x83) { + const char* visiblePtr = w.c_str() + 3; + const int prefixWidth = renderer.getTextAdvanceX(fontId, std::string("\xe2\x80\x83").c_str()); + const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, currentStyle); + startX = wordX + prefixWidth; + underlineWidth = visibleWidth; + } + + renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true); + } std::advance(wordIt, 1); std::advance(wordStylesIt, 1); @@ -37,8 +60,19 @@ bool TextBlock::serialize(FsFile& file) const { for (auto x : wordXpos) serialization::writePod(file, x); for (auto s : wordStyles) serialization::writePod(file, s); - // Block style - serialization::writePod(file, style); + // Style (alignment + margins/padding/indent) + serialization::writePod(file, blockStyle.alignment); + serialization::writePod(file, blockStyle.textAlignDefined); + serialization::writePod(file, blockStyle.marginTop); + serialization::writePod(file, blockStyle.marginBottom); + serialization::writePod(file, blockStyle.marginLeft); + serialization::writePod(file, blockStyle.marginRight); + serialization::writePod(file, blockStyle.paddingTop); + serialization::writePod(file, blockStyle.paddingBottom); + serialization::writePod(file, blockStyle.paddingLeft); + serialization::writePod(file, blockStyle.paddingRight); + serialization::writePod(file, blockStyle.textIndent); + serialization::writePod(file, blockStyle.textIndentDefined); return true; } @@ -48,7 +82,7 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { std::list words; std::list wordXpos; std::list wordStyles; - Style style; + BlockStyle blockStyle; // Word count serialization::readPod(file, wc); @@ -67,8 +101,20 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& s : wordStyles) serialization::readPod(file, s); - // Block style - serialization::readPod(file, style); + // Style (alignment + margins/padding/indent) + serialization::readPod(file, blockStyle.alignment); + serialization::readPod(file, blockStyle.textAlignDefined); + serialization::readPod(file, blockStyle.marginTop); + serialization::readPod(file, blockStyle.marginBottom); + serialization::readPod(file, blockStyle.marginLeft); + serialization::readPod(file, blockStyle.marginRight); + serialization::readPod(file, blockStyle.paddingTop); + serialization::readPod(file, blockStyle.paddingBottom); + serialization::readPod(file, blockStyle.paddingLeft); + serialization::readPod(file, blockStyle.paddingRight); + serialization::readPod(file, blockStyle.textIndent); + serialization::readPod(file, blockStyle.textIndentDefined); - return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style)); + return std::unique_ptr( + new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), blockStyle)); } diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 415a18f3..e233f77f 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -7,30 +7,26 @@ #include #include "Block.h" +#include "BlockStyle.h" // Represents a line of text on a page class TextBlock final : public Block { - public: - enum Style : uint8_t { - JUSTIFIED = 0, - LEFT_ALIGN = 1, - CENTER_ALIGN = 2, - RIGHT_ALIGN = 3, - }; - private: std::list words; std::list wordXpos; std::list wordStyles; - Style style; + BlockStyle blockStyle; public: explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const Style style) - : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {} + std::list word_styles, const BlockStyle& blockStyle = BlockStyle()) + : words(std::move(words)), + wordXpos(std::move(word_xpos)), + wordStyles(std::move(word_styles)), + blockStyle(blockStyle) {} ~TextBlock() override = default; - void setStyle(const Style style) { this->style = style; } - Style getStyle() const { return style; } + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } + const BlockStyle& getBlockStyle() const { return blockStyle; } bool isEmpty() override { return words.empty(); } void layout(GfxRenderer& renderer) override {}; // given a renderer works out where to break the words into lines diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp new file mode 100644 index 00000000..d51ebba7 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -0,0 +1,697 @@ +#include "CssParser.h" + +#include + +#include +#include + +namespace { + +// Buffer size for reading CSS files +constexpr size_t READ_BUFFER_SIZE = 512; + +// Maximum CSS file size we'll process (prevent memory issues) +constexpr size_t MAX_CSS_SIZE = 64 * 1024; + +// Check if character is CSS whitespace +bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; } + +// Read entire file into string (with size limit) +std::string readFileContent(FsFile& file) { + std::string content; + content.reserve(std::min(static_cast(file.size()), MAX_CSS_SIZE)); + + char buffer[READ_BUFFER_SIZE]; + while (file.available() && content.size() < MAX_CSS_SIZE) { + const int bytesRead = file.read(buffer, sizeof(buffer)); + if (bytesRead <= 0) break; + content.append(buffer, bytesRead); + } + return content; +} + +// Remove CSS comments (/* ... */) from content +std::string stripComments(const std::string& css) { + std::string result; + result.reserve(css.size()); + + size_t pos = 0; + while (pos < css.size()) { + // Look for start of comment + if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') { + // Find end of comment + const size_t endPos = css.find("*/", pos + 2); + if (endPos == std::string::npos) { + // Unterminated comment - skip rest of file + break; + } + pos = endPos + 2; + } else { + result.push_back(css[pos]); + ++pos; + } + } + return result; +} + +// Skip @-rules (like @media, @import, @font-face) +// Returns position after the @-rule +size_t skipAtRule(const std::string& css, const size_t start) { + // Find the end - either semicolon (simple @-rule) or matching brace + size_t pos = start + 1; // Skip the '@' + + // Skip identifier + while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) { + ++pos; + } + + // Look for { or ; + int braceDepth = 0; + while (pos < css.size()) { + const char c = css[pos]; + if (c == '{') { + ++braceDepth; + } else if (c == '}') { + --braceDepth; + if (braceDepth == 0) { + return pos + 1; + } + } else if (c == ';' && braceDepth == 0) { + return pos + 1; + } + ++pos; + } + return css.size(); +} + +// Extract next rule from CSS content +// Returns true if a rule was found, with selector and body filled +bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) { + selector.clear(); + body.clear(); + + // Skip whitespace and @-rules until we find a regular rule + while (pos < css.size()) { + // Skip whitespace + while (pos < css.size() && isCssWhitespace(css[pos])) { + ++pos; + } + + if (pos >= css.size()) return false; + + // Handle @-rules iteratively (avoids recursion/stack overflow) + if (css[pos] == '@') { + pos = skipAtRule(css, pos); + continue; // Try again after skipping the @-rule + } + + break; // Found start of a regular rule + } + + if (pos >= css.size()) return false; + + // Find opening brace + const size_t bracePos = css.find('{', pos); + if (bracePos == std::string::npos) return false; + + // Extract selector (everything before the brace) + selector = css.substr(pos, bracePos - pos); + + // Find matching closing brace + int depth = 1; + const size_t bodyStart = bracePos + 1; + size_t bodyEnd = bodyStart; + + while (bodyEnd < css.size() && depth > 0) { + if (css[bodyEnd] == '{') + ++depth; + else if (css[bodyEnd] == '}') + --depth; + ++bodyEnd; + } + + // Extract body (between braces) + if (bodyEnd > bodyStart) { + body = css.substr(bodyStart, bodyEnd - bodyStart - 1); + } + + pos = bodyEnd; + return true; +} + +} // anonymous namespace + +// String utilities implementation + +std::string CssParser::normalized(const std::string& s) { + std::string result; + result.reserve(s.size()); + + bool inSpace = true; // Start true to skip leading space + for (const char c : s) { + if (isCssWhitespace(c)) { + if (!inSpace) { + result.push_back(' '); + inSpace = true; + } + } else { + result.push_back(static_cast(std::tolower(static_cast(c)))); + inSpace = false; + } + } + + // Remove trailing space + if (!result.empty() && result.back() == ' ') { + result.pop_back(); + } + return result; +} + +std::vector CssParser::splitOnChar(const std::string& s, const char delimiter) { + std::vector parts; + size_t start = 0; + + for (size_t i = 0; i <= s.size(); ++i) { + if (i == s.size() || s[i] == delimiter) { + std::string part = s.substr(start, i - start); + std::string trimmed = normalized(part); + if (!trimmed.empty()) { + parts.push_back(trimmed); + } + start = i + 1; + } + } + return parts; +} + +std::vector CssParser::splitWhitespace(const std::string& s) { + std::vector parts; + size_t start = 0; + bool inWord = false; + + for (size_t i = 0; i <= s.size(); ++i) { + const bool isSpace = i == s.size() || isCssWhitespace(s[i]); + if (isSpace && inWord) { + parts.push_back(s.substr(start, i - start)); + inWord = false; + } else if (!isSpace && !inWord) { + start = i; + inWord = true; + } + } + return parts; +} + +// Property value interpreters + +CssTextAlign CssParser::interpretAlignment(const std::string& val) { + const std::string v = normalized(val); + + if (v == "left" || v == "start") return CssTextAlign::Left; + if (v == "right" || v == "end") return CssTextAlign::Right; + if (v == "center") return CssTextAlign::Center; + if (v == "justify") return CssTextAlign::Justify; + + return CssTextAlign::Left; +} + +CssFontStyle CssParser::interpretFontStyle(const std::string& val) { + const std::string v = normalized(val); + + if (v == "italic" || v == "oblique") return CssFontStyle::Italic; + return CssFontStyle::Normal; +} + +CssFontWeight CssParser::interpretFontWeight(const std::string& val) { + const std::string v = normalized(val); + + // Named values + if (v == "bold" || v == "bolder") return CssFontWeight::Bold; + if (v == "normal" || v == "lighter") return CssFontWeight::Normal; + + // Numeric values: 100-900 + // CSS spec: 400 = normal, 700 = bold + // We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative) + char* endPtr = nullptr; + const long numericWeight = std::strtol(v.c_str(), &endPtr, 10); + + // If we parsed a number and consumed the whole string + if (endPtr != v.c_str() && *endPtr == '\0') { + return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal; + } + + return CssFontWeight::Normal; +} + +CssTextDecoration CssParser::interpretDecoration(const std::string& val) { + const std::string v = normalized(val); + + // text-decoration can have multiple space-separated values + if (v.find("underline") != std::string::npos) { + return CssTextDecoration::Underline; + } + return CssTextDecoration::None; +} + +CssLength CssParser::interpretLength(const std::string& val) { + const std::string v = normalized(val); + if (v.empty()) return CssLength{}; + + // Find where the number ends + size_t unitStart = v.size(); + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + // Parse numeric value + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + if (endPtr == numPart.c_str()) return CssLength{}; // No number parsed + + // Determine unit type (preserve for deferred resolution) + auto unit = CssUnit::Pixels; + if (unitPart == "em") { + unit = CssUnit::Em; + } else if (unitPart == "rem") { + unit = CssUnit::Rem; + } else if (unitPart == "pt") { + unit = CssUnit::Points; + } + // px and unitless default to Pixels + + return CssLength{numericValue, unit}; +} + +int8_t CssParser::interpretSpacing(const std::string& val) { + const std::string v = normalized(val); + if (v.empty()) return 0; + + // For spacing, we convert to "lines" (discrete units for e-ink) + // 1em ≈ 1 line, percentages based on ~30 lines per page + + float multiplier = 0.0f; + size_t unitStart = v.size(); + + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + if (unitPart == "em" || unitPart == "rem") { + multiplier = 1.0f; // 1em = 1 line + } else if (unitPart == "%") { + multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines + } else { + return 0; // Unsupported unit for spacing + } + + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + + if (endPtr == numPart.c_str()) return 0; + + int lines = static_cast(numericValue * multiplier); + + // Clamp to reasonable range (0-2 lines) + if (lines < 0) lines = 0; + if (lines > 2) lines = 2; + + return static_cast(lines); +} + +// Declaration parsing + +CssStyle CssParser::parseDeclarations(const std::string& declBlock) { + CssStyle style; + + // Split declarations by semicolon + const auto declarations = splitOnChar(declBlock, ';'); + + for (const auto& decl : declarations) { + // Find colon separator + const size_t colonPos = decl.find(':'); + if (colonPos == std::string::npos || colonPos == 0) continue; + + std::string propName = normalized(decl.substr(0, colonPos)); + std::string propValue = normalized(decl.substr(colonPos + 1)); + + if (propName.empty() || propValue.empty()) continue; + + // Match property and set value + if (propName == "text-align") { + style.textAlign = interpretAlignment(propValue); + style.defined.textAlign = 1; + } else if (propName == "font-style") { + style.fontStyle = interpretFontStyle(propValue); + style.defined.fontStyle = 1; + } else if (propName == "font-weight") { + style.fontWeight = interpretFontWeight(propValue); + style.defined.fontWeight = 1; + } else if (propName == "text-decoration" || propName == "text-decoration-line") { + style.textDecoration = interpretDecoration(propValue); + style.defined.textDecoration = 1; + } else if (propName == "text-indent") { + style.textIndent = interpretLength(propValue); + style.defined.textIndent = 1; + } else if (propName == "margin-top") { + style.marginTop = interpretLength(propValue); + style.defined.marginTop = 1; + } else if (propName == "margin-bottom") { + style.marginBottom = interpretLength(propValue); + style.defined.marginBottom = 1; + } else if (propName == "margin-left") { + style.marginLeft = interpretLength(propValue); + style.defined.marginLeft = 1; + } else if (propName == "margin-right") { + style.marginRight = interpretLength(propValue); + style.defined.marginRight = 1; + } else if (propName == "margin") { + // Shorthand: 1-4 values for top, right, bottom, left + const auto values = splitWhitespace(propValue); + if (!values.empty()) { + style.marginTop = interpretLength(values[0]); + style.marginRight = values.size() >= 2 ? interpretLength(values[1]) : style.marginTop; + style.marginBottom = values.size() >= 3 ? interpretLength(values[2]) : style.marginTop; + style.marginLeft = values.size() >= 4 ? interpretLength(values[3]) : style.marginRight; + style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; + } + } else if (propName == "padding-top") { + style.paddingTop = interpretLength(propValue); + style.defined.paddingTop = 1; + } else if (propName == "padding-bottom") { + style.paddingBottom = interpretLength(propValue); + style.defined.paddingBottom = 1; + } else if (propName == "padding-left") { + style.paddingLeft = interpretLength(propValue); + style.defined.paddingLeft = 1; + } else if (propName == "padding-right") { + style.paddingRight = interpretLength(propValue); + style.defined.paddingRight = 1; + } else if (propName == "padding") { + // Shorthand: 1-4 values for top, right, bottom, left + const auto values = splitWhitespace(propValue); + if (!values.empty()) { + style.paddingTop = interpretLength(values[0]); + style.paddingRight = values.size() >= 2 ? interpretLength(values[1]) : style.paddingTop; + style.paddingBottom = values.size() >= 3 ? interpretLength(values[2]) : style.paddingTop; + style.paddingLeft = values.size() >= 4 ? interpretLength(values[3]) : style.paddingRight; + style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = + style.defined.paddingLeft = 1; + } + } + } + + return style; +} + +// Rule processing + +void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) { + const CssStyle style = parseDeclarations(declarations); + + // Only store if any properties were set + if (!style.defined.anySet()) return; + + // Handle comma-separated selectors + const auto selectors = splitOnChar(selectorGroup, ','); + + for (const auto& sel : selectors) { + // Normalize the selector + std::string key = normalized(sel); + if (key.empty()) continue; + + // Store or merge with existing + auto it = rulesBySelector_.find(key); + if (it != rulesBySelector_.end()) { + it->second.applyOver(style); + } else { + rulesBySelector_[key] = style; + } + } +} + +// Main parsing entry point + +bool CssParser::loadFromStream(FsFile& source) { + if (!source) { + Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis()); + return false; + } + + // Read file content + const std::string content = readFileContent(source); + if (content.empty()) { + return true; // Empty file is valid + } + + // Remove comments + const std::string cleaned = stripComments(content); + + // Parse rules + size_t pos = 0; + std::string selector, body; + + while (extractNextRule(cleaned, pos, selector, body)) { + processRuleBlock(selector, body); + } + + Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size()); + return true; +} + +// Style resolution + +CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const { + CssStyle result; + const std::string tag = normalized(tagName); + + // 1. Apply element-level style (lowest priority) + const auto tagIt = rulesBySelector_.find(tag); + if (tagIt != rulesBySelector_.end()) { + result.applyOver(tagIt->second); + } + + // 2. Apply class styles (medium priority) + if (!classAttr.empty()) { + const auto classes = splitWhitespace(classAttr); + + for (const auto& cls : classes) { + std::string classKey = "." + normalized(cls); + + auto classIt = rulesBySelector_.find(classKey); + if (classIt != rulesBySelector_.end()) { + result.applyOver(classIt->second); + } + } + + // 3. Apply element.class styles (higher priority) + for (const auto& cls : classes) { + std::string combinedKey = tag + "." + normalized(cls); + + auto combinedIt = rulesBySelector_.find(combinedKey); + if (combinedIt != rulesBySelector_.end()) { + result.applyOver(combinedIt->second); + } + } + } + + return result; +} + +// Inline style parsing (static - doesn't need rule database) + +CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); } + +// Cache serialization + +// Cache format version - increment when format changes +constexpr uint8_t CSS_CACHE_VERSION = 1; + +bool CssParser::saveToCache(FsFile& file) const { + if (!file) { + return false; + } + + // Write version + file.write(CSS_CACHE_VERSION); + + // Write rule count + const auto ruleCount = static_cast(rulesBySelector_.size()); + file.write(reinterpret_cast(&ruleCount), sizeof(ruleCount)); + + // Write each rule: selector string + CssStyle fields + for (const auto& pair : rulesBySelector_) { + // Write selector string (length-prefixed) + const auto selectorLen = static_cast(pair.first.size()); + file.write(reinterpret_cast(&selectorLen), sizeof(selectorLen)); + file.write(reinterpret_cast(pair.first.data()), selectorLen); + + // Write CssStyle fields (all are POD types) + const CssStyle& style = pair.second; + file.write(static_cast(style.textAlign)); + file.write(static_cast(style.fontStyle)); + file.write(static_cast(style.fontWeight)); + file.write(static_cast(style.textDecoration)); + + // Write CssLength fields (value + unit) + auto writeLength = [&file](const CssLength& len) { + file.write(reinterpret_cast(&len.value), sizeof(len.value)); + file.write(static_cast(len.unit)); + }; + + writeLength(style.textIndent); + writeLength(style.marginTop); + writeLength(style.marginBottom); + writeLength(style.marginLeft); + writeLength(style.marginRight); + writeLength(style.paddingTop); + writeLength(style.paddingBottom); + writeLength(style.paddingLeft); + writeLength(style.paddingRight); + + // Write defined flags as uint16_t + uint16_t definedBits = 0; + if (style.defined.textAlign) definedBits |= 1 << 0; + if (style.defined.fontStyle) definedBits |= 1 << 1; + if (style.defined.fontWeight) definedBits |= 1 << 2; + if (style.defined.textDecoration) definedBits |= 1 << 3; + if (style.defined.textIndent) definedBits |= 1 << 4; + if (style.defined.marginTop) definedBits |= 1 << 5; + if (style.defined.marginBottom) definedBits |= 1 << 6; + if (style.defined.marginLeft) definedBits |= 1 << 7; + if (style.defined.marginRight) definedBits |= 1 << 8; + if (style.defined.paddingTop) definedBits |= 1 << 9; + if (style.defined.paddingBottom) definedBits |= 1 << 10; + if (style.defined.paddingLeft) definedBits |= 1 << 11; + if (style.defined.paddingRight) definedBits |= 1 << 12; + file.write(reinterpret_cast(&definedBits), sizeof(definedBits)); + } + + Serial.printf("[%lu] [CSS] Saved %u rules to cache\n", millis(), ruleCount); + return true; +} + +bool CssParser::loadFromCache(FsFile& file) { + if (!file) { + return false; + } + + // Clear existing rules + clear(); + + // Read and verify version + uint8_t version = 0; + if (file.read(&version, 1) != 1 || version != CSS_CACHE_VERSION) { + Serial.printf("[%lu] [CSS] Cache version mismatch (got %u, expected %u)\n", millis(), version, CSS_CACHE_VERSION); + return false; + } + + // Read rule count + uint16_t ruleCount = 0; + if (file.read(&ruleCount, sizeof(ruleCount)) != sizeof(ruleCount)) { + return false; + } + + // Read each rule + for (uint16_t i = 0; i < ruleCount; ++i) { + // Read selector string + uint16_t selectorLen = 0; + if (file.read(&selectorLen, sizeof(selectorLen)) != sizeof(selectorLen)) { + rulesBySelector_.clear(); + return false; + } + + std::string selector; + selector.resize(selectorLen); + if (file.read(&selector[0], selectorLen) != selectorLen) { + rulesBySelector_.clear(); + return false; + } + + // Read CssStyle fields + CssStyle style; + uint8_t enumVal; + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.textAlign = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.fontStyle = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.fontWeight = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.textDecoration = static_cast(enumVal); + + // Read CssLength fields + auto readLength = [&file](CssLength& len) -> bool { + if (file.read(&len.value, sizeof(len.value)) != sizeof(len.value)) { + return false; + } + uint8_t unitVal; + if (file.read(&unitVal, 1) != 1) { + return false; + } + len.unit = static_cast(unitVal); + return true; + }; + + if (!readLength(style.textIndent) || !readLength(style.marginTop) || !readLength(style.marginBottom) || + !readLength(style.marginLeft) || !readLength(style.marginRight) || !readLength(style.paddingTop) || + !readLength(style.paddingBottom) || !readLength(style.paddingLeft) || !readLength(style.paddingRight)) { + rulesBySelector_.clear(); + return false; + } + + // Read defined flags + uint16_t definedBits = 0; + if (file.read(&definedBits, sizeof(definedBits)) != sizeof(definedBits)) { + rulesBySelector_.clear(); + return false; + } + style.defined.textAlign = (definedBits & 1 << 0) != 0; + style.defined.fontStyle = (definedBits & 1 << 1) != 0; + style.defined.fontWeight = (definedBits & 1 << 2) != 0; + style.defined.textDecoration = (definedBits & 1 << 3) != 0; + style.defined.textIndent = (definedBits & 1 << 4) != 0; + style.defined.marginTop = (definedBits & 1 << 5) != 0; + style.defined.marginBottom = (definedBits & 1 << 6) != 0; + style.defined.marginLeft = (definedBits & 1 << 7) != 0; + style.defined.marginRight = (definedBits & 1 << 8) != 0; + style.defined.paddingTop = (definedBits & 1 << 9) != 0; + style.defined.paddingBottom = (definedBits & 1 << 10) != 0; + style.defined.paddingLeft = (definedBits & 1 << 11) != 0; + style.defined.paddingRight = (definedBits & 1 << 12) != 0; + + rulesBySelector_[selector] = style; + } + + Serial.printf("[%lu] [CSS] Loaded %u rules from cache\n", millis(), ruleCount); + return true; +} diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h new file mode 100644 index 00000000..0e5a1b34 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.h @@ -0,0 +1,114 @@ +#pragma once + +#include + +#include +#include +#include + +#include "CssStyle.h" + +/** + * Lightweight CSS parser for EPUB stylesheets + * + * Parses CSS files and extracts styling information relevant for e-ink display. + * Uses a two-phase approach: first tokenizes the CSS content, then builds + * a rule database that can be queried during HTML parsing. + * + * Supported selectors: + * - Element selectors: p, div, h1, etc. + * - Class selectors: .classname + * - Combined: element.classname + * - Grouped: selector1, selector2 { } + * + * Not supported (silently ignored): + * - Descendant/child selectors + * - Pseudo-classes and pseudo-elements + * - Media queries (content is skipped) + * - @import, @font-face, etc. + */ +class CssParser { + public: + CssParser() = default; + ~CssParser() = default; + + // Non-copyable + CssParser(const CssParser&) = delete; + CssParser& operator=(const CssParser&) = delete; + + /** + * Load and parse CSS from a file stream. + * Can be called multiple times to accumulate rules from multiple stylesheets. + * @param source Open file handle to read from + * @return true if parsing completed (even if no rules found) + */ + bool loadFromStream(FsFile& source); + + /** + * Look up the style for an HTML element, considering tag name and class attributes. + * Applies CSS cascade: element style < class style < element.class style + * + * @param tagName The HTML element name (e.g., "p", "div") + * @param classAttr The class attribute value (may contain multiple space-separated classes) + * @return Combined style with all applicable rules merged + */ + [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const; + + /** + * Parse an inline style attribute string. + * @param styleValue The value of a style="" attribute + * @return Parsed style properties + */ + [[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue); + + /** + * Check if any rules have been loaded + */ + [[nodiscard]] bool empty() const { return rulesBySelector_.empty(); } + + /** + * Get count of loaded rule sets + */ + [[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); } + + /** + * Clear all loaded rules + */ + void clear() { rulesBySelector_.clear(); } + + /** + * Save parsed CSS rules to a cache file. + * @param file Open file handle to write to + * @return true if cache was written successfully + */ + bool saveToCache(FsFile& file) const; + + /** + * Load CSS rules from a cache file. + * Clears any existing rules before loading. + * @param file Open file handle to read from + * @return true if cache was loaded successfully + */ + bool loadFromCache(FsFile& file); + + private: + // Storage: maps normalized selector -> style properties + std::unordered_map rulesBySelector_; + + // Internal parsing helpers + void processRuleBlock(const std::string& selectorGroup, const std::string& declarations); + static CssStyle parseDeclarations(const std::string& declBlock); + + // Individual property value parsers + static CssTextAlign interpretAlignment(const std::string& val); + static CssFontStyle interpretFontStyle(const std::string& val); + static CssFontWeight interpretFontWeight(const std::string& val); + static CssTextDecoration interpretDecoration(const std::string& val); + static CssLength interpretLength(const std::string& val); + static int8_t interpretSpacing(const std::string& val); + + // String utilities + static std::string normalized(const std::string& s); + static std::vector splitOnChar(const std::string& s, char delimiter); + static std::vector splitWhitespace(const std::string& s); +}; diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h new file mode 100644 index 00000000..7b83da3f --- /dev/null +++ b/lib/Epub/Epub/css/CssStyle.h @@ -0,0 +1,191 @@ +#pragma once + +#include + +// Matches order of PARAGRAPH_ALIGNMENT in CrossPointSettings +enum class CssTextAlign : uint8_t { Justify = 0, Left = 1, Center = 2, Right = 3 }; +enum class CssUnit : uint8_t { Pixels = 0, Em = 1, Rem = 2, Points = 3 }; + +// Represents a CSS length value with its unit, allowing deferred resolution to pixels +struct CssLength { + float value = 0.0f; + CssUnit unit = CssUnit::Pixels; + + CssLength() = default; + CssLength(const float v, const CssUnit u) : value(v), unit(u) {} + + // Convenience constructor for pixel values (most common case) + explicit CssLength(const float pixels) : value(pixels) {} + + // Resolve to pixels given the current em size (font line height) + [[nodiscard]] float toPixels(const float emSize) const { + switch (unit) { + case CssUnit::Em: + case CssUnit::Rem: + return value * emSize; + case CssUnit::Points: + return value * 1.33f; // Approximate pt to px conversion + default: + return value; + } + } + + // Resolve to int16_t pixels (for BlockStyle fields) + [[nodiscard]] int16_t toPixelsInt16(const float emSize) const { return static_cast(toPixels(emSize)); } +}; + +// Font style options matching CSS font-style property +enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 }; + +// Font weight options - CSS supports 100-900, we simplify to normal/bold +enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 }; + +// Text decoration options +enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 }; + +// Bitmask for tracking which properties have been explicitly set +struct CssPropertyFlags { + uint16_t textAlign : 1; + uint16_t fontStyle : 1; + uint16_t fontWeight : 1; + uint16_t textDecoration : 1; + uint16_t textIndent : 1; + uint16_t marginTop : 1; + uint16_t marginBottom : 1; + uint16_t marginLeft : 1; + uint16_t marginRight : 1; + uint16_t paddingTop : 1; + uint16_t paddingBottom : 1; + uint16_t paddingLeft : 1; + uint16_t paddingRight : 1; + + CssPropertyFlags() + : textAlign(0), + fontStyle(0), + fontWeight(0), + textDecoration(0), + textIndent(0), + marginTop(0), + marginBottom(0), + marginLeft(0), + marginRight(0), + paddingTop(0), + paddingBottom(0), + paddingLeft(0), + paddingRight(0) {} + + [[nodiscard]] bool anySet() const { + return textAlign || fontStyle || fontWeight || textDecoration || textIndent || marginTop || marginBottom || + marginLeft || marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight; + } + + void clearAll() { + textAlign = fontStyle = fontWeight = textDecoration = textIndent = 0; + marginTop = marginBottom = marginLeft = marginRight = 0; + paddingTop = paddingBottom = paddingLeft = paddingRight = 0; + } +}; + +// Represents a collection of CSS style properties +// Only stores properties relevant to e-ink text rendering +// Length values are stored as CssLength (value + unit) for deferred resolution +struct CssStyle { + CssTextAlign textAlign = CssTextAlign::Left; + CssFontStyle fontStyle = CssFontStyle::Normal; + CssFontWeight fontWeight = CssFontWeight::Normal; + CssTextDecoration textDecoration = CssTextDecoration::None; + + CssLength textIndent; // First-line indent (deferred resolution) + CssLength marginTop; // Vertical spacing before block + CssLength marginBottom; // Vertical spacing after block + CssLength marginLeft; // Horizontal spacing left of block + CssLength marginRight; // Horizontal spacing right of block + CssLength paddingTop; // Padding before + CssLength paddingBottom; // Padding after + CssLength paddingLeft; // Padding left + CssLength paddingRight; // Padding right + + CssPropertyFlags defined; // Tracks which properties were explicitly set + + // Apply properties from another style, only overwriting if the other style + // has that property explicitly defined + void applyOver(const CssStyle& base) { + if (base.hasTextAlign()) { + textAlign = base.textAlign; + defined.textAlign = 1; + } + if (base.hasFontStyle()) { + fontStyle = base.fontStyle; + defined.fontStyle = 1; + } + if (base.hasFontWeight()) { + fontWeight = base.fontWeight; + defined.fontWeight = 1; + } + if (base.hasTextDecoration()) { + textDecoration = base.textDecoration; + defined.textDecoration = 1; + } + if (base.hasTextIndent()) { + textIndent = base.textIndent; + defined.textIndent = 1; + } + if (base.hasMarginTop()) { + marginTop = base.marginTop; + defined.marginTop = 1; + } + if (base.hasMarginBottom()) { + marginBottom = base.marginBottom; + defined.marginBottom = 1; + } + if (base.hasMarginLeft()) { + marginLeft = base.marginLeft; + defined.marginLeft = 1; + } + if (base.hasMarginRight()) { + marginRight = base.marginRight; + defined.marginRight = 1; + } + if (base.hasPaddingTop()) { + paddingTop = base.paddingTop; + defined.paddingTop = 1; + } + if (base.hasPaddingBottom()) { + paddingBottom = base.paddingBottom; + defined.paddingBottom = 1; + } + if (base.hasPaddingLeft()) { + paddingLeft = base.paddingLeft; + defined.paddingLeft = 1; + } + if (base.hasPaddingRight()) { + paddingRight = base.paddingRight; + defined.paddingRight = 1; + } + } + + [[nodiscard]] bool hasTextAlign() const { return defined.textAlign; } + [[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; } + [[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; } + [[nodiscard]] bool hasTextDecoration() const { return defined.textDecoration; } + [[nodiscard]] bool hasTextIndent() const { return defined.textIndent; } + [[nodiscard]] bool hasMarginTop() const { return defined.marginTop; } + [[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; } + [[nodiscard]] bool hasMarginLeft() const { return defined.marginLeft; } + [[nodiscard]] bool hasMarginRight() const { return defined.marginRight; } + [[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; } + [[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; } + [[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; } + [[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; } + + void reset() { + textAlign = CssTextAlign::Left; + fontStyle = CssFontStyle::Normal; + fontWeight = CssFontWeight::Normal; + textDecoration = CssTextDecoration::None; + textIndent = CssLength{}; + marginTop = marginBottom = marginLeft = marginRight = CssLength{}; + paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{}; + defined.clearAll(); + } +}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index ac1f537f..ab93d9cb 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]); const char* ITALIC_TAGS[] = {"i", "em"}; constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); +const char* UNDERLINE_TAGS[] = {"u", "ins"}; +constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]); + const char* IMAGE_TAGS[] = {"img"}; constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); @@ -40,17 +43,51 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +bool isHeaderOrBlock(const char* name) { + return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); +} + +// Update effective bold/italic/underline based on block style and inline style stack +void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { + // Start with block-level styles + effectiveBold = currentCssStyle.hasFontWeight() && currentCssStyle.fontWeight == CssFontWeight::Bold; + effectiveItalic = currentCssStyle.hasFontStyle() && currentCssStyle.fontStyle == CssFontStyle::Italic; + effectiveUnderline = + currentCssStyle.hasTextDecoration() && currentCssStyle.textDecoration == CssTextDecoration::Underline; + + // Apply inline style stack in order + for (const auto& entry : inlineStyleStack) { + if (entry.hasBold) { + effectiveBold = entry.bold; + } + if (entry.hasItalic) { + effectiveItalic = entry.italic; + } + if (entry.hasUnderline) { + effectiveUnderline = entry.underline; + } + } +} + // flush the contents of partWordBuffer to currentTextBlock void ChapterHtmlSlimParser::flushPartWordBuffer() { - // determine font style + // Determine font style from depth-based tracking and CSS effective style + const bool isBold = boldUntilDepth < depth || effectiveBold; + const bool isItalic = italicUntilDepth < depth || effectiveItalic; + const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline; + + // Combine style flags using bitwise OR EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (boldUntilDepth < depth && italicUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (boldUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (italicUntilDepth < depth) { - fontStyle = EpdFontFamily::ITALIC; + if (isBold) { + fontStyle = static_cast(fontStyle | EpdFontFamily::BOLD); } + if (isItalic) { + fontStyle = static_cast(fontStyle | EpdFontFamily::ITALIC); + } + if (isUnderline) { + fontStyle = static_cast(fontStyle | EpdFontFamily::UNDERLINE); + } + // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; currentTextBlock->addWord(partWordBuffer, fontStyle); @@ -58,17 +95,20 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() { } // start a new text block if needed -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { +void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { - currentTextBlock->setStyle(style); + // Merge with existing block style to accumulate CSS styling from parent block elements. + // This handles cases like

text

where the + // div's margin should be preserved, even though it has no direct text content. + currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle)); return; } makePages(); } - currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); + currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle)); } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { @@ -80,13 +120,30 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* return; } + // Extract class and style attributes for CSS processing + std::string classAttr; + std::string styleAttr; + if (atts != nullptr) { + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "class") == 0) { + classAttr = atts[i + 1]; + } else if (strcmp(atts[i], "style") == 0) { + styleAttr = atts[i + 1]; + } + } + } + + auto centeredBlockStyle = BlockStyle(); + centeredBlockStyle.textAlignDefined = true; + centeredBlockStyle.alignment = CssTextAlign::Center; + // Special handling for tables - show placeholder text instead of dropping silently if (strcmp(name, "table") == 0) { // Add placeholder text - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, "[Table omitted]", strlen("[Table omitted]")); @@ -111,9 +168,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, alt.c_str(), alt.length()); @@ -141,43 +198,113 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } - if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - self->startNewTextBlock(TextBlock::CENTER_ALIGN); - self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); - self->depth += 1; - return; + // Compute CSS style for this element + CssStyle cssStyle; + if (self->cssParser) { + // Get combined tag + class styles + cssStyle = self->cssParser->resolveStyle(name, classAttr); + // Merge inline style (highest priority) + if (!styleAttr.empty()) { + CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr); + cssStyle.applyOver(inlineStyle); + } } - if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { + const float emSize = static_cast(self->renderer.getLineHeight(self->fontId)) * self->lineCompression; + const auto userAlignment = static_cast(self->paragraphAlignment); + + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); + self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + self->updateEffectiveInlineStyle(); + } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { if (self->partWordBufferIndex > 0) { // flush word preceding
to currentTextBlock before calling startNewTextBlock self->flushPartWordBuffer(); } - self->startNewTextBlock(self->currentTextBlock->getStyle()); - self->depth += 1; - return; + self->startNewTextBlock(self->currentTextBlock->getBlockStyle()); + } else { + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); + self->updateEffectiveInlineStyle(); + + if (strcmp(name, "li") == 0) { + self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); + } } - - self->startNewTextBlock(static_cast(self->paragraphAlignment)); - if (strcmp(name, "li") == 0) { - self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); + } else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) { + self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth); + // Push inline style entry for underline tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasUnderline = true; + entry.underline = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; } - - self->depth += 1; - return; - } - - if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); - self->depth += 1; - return; - } - - if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { + // Push inline style entry for bold tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasBold = true; + entry.bold = true; + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); - self->depth += 1; - return; + // Push inline style entry for italic tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasItalic = true; + entry.italic = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (strcmp(name, "span") == 0 || !isHeaderOrBlock(name)) { + // Handle span and other inline elements for CSS styling + if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) { + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } } // Unprocessed tag, just increasing depth and continue forward @@ -239,17 +366,27 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { auto* self = static_cast(userData); - if (self->partWordBufferIndex > 0) { - // Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file. - // We don't want to flush out content when closing inline tags like . - // Currently this also flushes out on closing and tags, but they are line tags so that shouldn't happen, - // text styling needs to be overhauled to fix it. - const bool shouldBreakText = - matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || - strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; + // Check if any style state will change after we decrement depth + // If so, we MUST flush the partWordBuffer with the CURRENT style first + // Note: depth hasn't been decremented yet, so we check against (depth - 1) + const bool willPopStyleStack = + !self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1; + const bool willClearBold = self->boldUntilDepth == self->depth - 1; + const bool willClearItalic = self->italicUntilDepth == self->depth - 1; + const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; - if (shouldBreakText) { + const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; + const bool headerOrBlockTag = isHeaderOrBlock(name); + + // Flush buffer with current style BEFORE any style changes + if (self->partWordBufferIndex > 0) { + // Flush if style will change OR if we're closing a block/structural element + const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || + matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 || + matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; + + if (shouldFlush) { self->flushPartWordBuffer(); } } @@ -261,19 +398,40 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n self->skipUntilDepth = INT_MAX; } - // Leaving bold + // Leaving bold tag if (self->boldUntilDepth == self->depth) { self->boldUntilDepth = INT_MAX; } - // Leaving italic + // Leaving italic tag if (self->italicUntilDepth == self->depth) { self->italicUntilDepth = INT_MAX; } + + // Leaving underline tag + if (self->underlineUntilDepth == self->depth) { + self->underlineUntilDepth = INT_MAX; + } + + // Pop from inline style stack if we pushed an entry at this depth + // This handles all inline elements: b, i, u, span, etc. + if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) { + self->inlineStyleStack.pop_back(); + self->updateEffectiveInlineStyle(); + } + + // Clear block style when leaving header or block elements + if (headerOrBlockTag) { + self->currentCssStyle.reset(); + self->updateEffectiveInlineStyle(); + } } bool ChapterHtmlSlimParser::parseAndBuildPages() { - startNewTextBlock((TextBlock::Style)this->paragraphAlignment); + auto paragraphAlignmentBlockStyle = BlockStyle(); + paragraphAlignmentBlockStyle.textAlignDefined = true; + paragraphAlignmentBlockStyle.alignment = static_cast(this->paragraphAlignment); + startNewTextBlock(paragraphAlignmentBlockStyle); const XML_Parser parser = XML_ParserCreate(nullptr); int done; @@ -362,7 +520,9 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line) { currentPageNextY = 0; } - currentPage->elements.push_back(std::make_shared(line, 0, currentPageNextY)); + // Apply horizontal left inset (margin + padding) as x position offset + const int16_t xOffset = line->getBlockStyle().leftInset(); + currentPage->elements.push_back(std::make_shared(line, xOffset, currentPageNextY)); currentPageNextY += lineHeight; } @@ -378,10 +538,34 @@ void ChapterHtmlSlimParser::makePages() { } const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; + + // Apply top spacing before the paragraph (stored in pixels) + const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); + if (blockStyle.marginTop > 0) { + currentPageNextY += blockStyle.marginTop; + } + if (blockStyle.paddingTop > 0) { + currentPageNextY += blockStyle.paddingTop; + } + + // Calculate effective width accounting for horizontal margins/padding + const int horizontalInset = blockStyle.totalHorizontalInset(); + const uint16_t effectiveWidth = + (horizontalInset < viewportWidth) ? static_cast(viewportWidth - horizontalInset) : viewportWidth; + currentTextBlock->layoutAndExtractLines( - renderer, fontId, viewportWidth, + renderer, fontId, effectiveWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Extra paragraph spacing if enabled + + // Apply bottom spacing after the paragraph (stored in pixels) + if (blockStyle.marginBottom > 0) { + currentPageNextY += blockStyle.marginBottom; + } + if (blockStyle.paddingBottom > 0) { + currentPageNextY += blockStyle.paddingBottom; + } + + // Extra paragraph spacing if enabled (default behavior) if (extraParagraphSpacing) { currentPageNextY += lineHeight / 2; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 38202e6e..92a9838a 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -8,6 +8,8 @@ #include "../ParsedText.h" #include "../blocks/TextBlock.h" +#include "../css/CssParser.h" +#include "../css/CssStyle.h" class Page; class GfxRenderer; @@ -23,6 +25,7 @@ class ChapterHtmlSlimParser { int skipUntilDepth = INT_MAX; int boldUntilDepth = INT_MAX; int italicUntilDepth = INT_MAX; + int underlineUntilDepth = INT_MAX; // buffer for building up words from characters, will auto break if longer than this // leave one char at end for null pointer char partWordBuffer[MAX_WORD_SIZE + 1] = {}; @@ -37,8 +40,23 @@ class ChapterHtmlSlimParser { uint16_t viewportWidth; uint16_t viewportHeight; bool hyphenationEnabled; + const CssParser* cssParser; - void startNewTextBlock(TextBlock::Style style); + // Style tracking (replaces depth-based approach) + struct StyleStackEntry { + int depth = 0; + bool hasBold = false, bold = false; + bool hasItalic = false, italic = false; + bool hasUnderline = false, underline = false; + }; + std::vector inlineStyleStack; + CssStyle currentCssStyle; + bool effectiveBold = false; + bool effectiveItalic = false; + bool effectiveUnderline = false; + + void updateEffectiveInlineStyle(); + void startNewTextBlock(const BlockStyle& blockStyle); void flushPartWordBuffer(); void makePages(); // XML callbacks @@ -52,7 +70,8 @@ class ChapterHtmlSlimParser { const uint8_t paragraphAlignment, const uint16_t viewportWidth, const uint16_t viewportHeight, const bool hyphenationEnabled, const std::function)>& completePageFn, - const std::function& popupFn = nullptr) + const std::function& popupFn = nullptr, const CssParser* cssParser = nullptr) + : filepath(filepath), renderer(renderer), fontId(fontId), @@ -63,7 +82,9 @@ class ChapterHtmlSlimParser { viewportHeight(viewportHeight), hyphenationEnabled(hyphenationEnabled), completePageFn(completePageFn), - popupFn(popupFn) {} + popupFn(popupFn), + cssParser(cssParser) {} + ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages(); void addLineToPage(std::shared_ptr line); diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index ce0e22ea..c6cdec4e 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -8,6 +8,7 @@ namespace { constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; +constexpr char MEDIA_TYPE_CSS[] = "text/css"; constexpr char itemCacheFile[] = "/.items.bin"; } // namespace @@ -218,6 +219,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name } } + // Collect CSS files + if (mediaType == MEDIA_TYPE_CSS) { + self->cssFiles.push_back(href); + } + // EPUB 3: Check for nav document (properties contains "nav") if (!properties.empty() && self->tocNavPath.empty()) { // Properties is space-separated, check if "nav" is present as a word diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index b40a3787..1253eae3 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -64,6 +64,7 @@ class ContentOpfParser final : public Print { std::string tocNavPath; // EPUB 3 nav document path std::string coverItemHref; std::string textReferenceHref; + std::vector cssFiles; // CSS stylesheet paths explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache) diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index b5aa7710..40caf18c 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -470,6 +470,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const { return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX; } +int GfxRenderer::getTextAdvanceX(const int fontId, const char* text) const { + if (fontMap.count(fontId) == 0) { + Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); + return 0; + } + + uint32_t cp; + int width = 0; + while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX; + } + return width; +} + int GfxRenderer::getFontAscenderSize(const int fontId) const { if (fontMap.count(fontId) == 0) { Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h index 86ddc8fc..66d625f5 100644 --- a/lib/GfxRenderer/GfxRenderer.h +++ b/lib/GfxRenderer/GfxRenderer.h @@ -78,6 +78,7 @@ class GfxRenderer { void drawText(int fontId, int x, int y, const char* text, bool black = true, EpdFontFamily::Style style = EpdFontFamily::REGULAR) const; int getSpaceWidth(int fontId) const; + int getTextAdvanceX(int fontId, const char* text) const; int getFontAscenderSize(int fontId) const; int getLineHeight(int fontId) const; std::string truncatedText(int fontId, const char* text, int maxWidth, diff --git a/src/activities/boot_sleep/SleepActivity.cpp b/src/activities/boot_sleep/SleepActivity.cpp index 7ffc5851..39460435 100644 --- a/src/activities/boot_sleep/SleepActivity.cpp +++ b/src/activities/boot_sleep/SleepActivity.cpp @@ -238,7 +238,8 @@ void SleepActivity::renderCoverSleepScreen() const { } else if (StringUtils::checkFileExtension(APP_STATE.openEpubPath, ".epub")) { // Handle EPUB file Epub lastEpub(APP_STATE.openEpubPath, "/.crosspoint"); - if (!lastEpub.load()) { + // Skip loading css since we only need metadata here + if (!lastEpub.load(true, true)) { Serial.println("[SLP] Failed to load last epub"); return renderDefaultSleepScreen(); } diff --git a/src/activities/home/HomeActivity.cpp b/src/activities/home/HomeActivity.cpp index 678af7cb..a6aa443b 100644 --- a/src/activities/home/HomeActivity.cpp +++ b/src/activities/home/HomeActivity.cpp @@ -52,7 +52,8 @@ void HomeActivity::onEnter() { // If epub, try to load the metadata for title/author and cover if (StringUtils::checkFileExtension(lastBookTitle, ".epub")) { Epub epub(APP_STATE.openEpubPath, "/.crosspoint"); - epub.load(false); + // Skip loading css since we only need metadata here + epub.load(false, true); if (!epub.getTitle().empty()) { lastBookTitle = std::string(epub.getTitle()); }