diff --git a/lib/EpdFont/EpdFontFamily.cpp b/lib/EpdFont/EpdFontFamily.cpp index 74a6677f..821153e3 100644 --- a/lib/EpdFont/EpdFontFamily.cpp +++ b/lib/EpdFont/EpdFontFamily.cpp @@ -1,23 +1,19 @@ #include "EpdFontFamily.h" const EpdFont* EpdFontFamily::getFont(const Style style) const { - if (style == BOLD && bold) { + // Extract font style bits (ignore UNDERLINE bit for font selection) + const bool hasBold = (style & BOLD) != 0; + const bool hasItalic = (style & ITALIC) != 0; + + if (hasBold && hasItalic) { + if (boldItalic) return boldItalic; + if (bold) return bold; + if (italic) return italic; + } else if (hasBold && bold) { return bold; - } - if (style == ITALIC && italic) { + } else if (hasItalic && italic) { return italic; } - if (style == BOLD_ITALIC) { - if (boldItalic) { - return boldItalic; - } - if (bold) { - return bold; - } - if (italic) { - return italic; - } - } return regular; } diff --git a/lib/EpdFont/EpdFontFamily.h b/lib/EpdFont/EpdFontFamily.h index 92043d1f..64fd9953 100644 --- a/lib/EpdFont/EpdFontFamily.h +++ b/lib/EpdFont/EpdFontFamily.h @@ -3,7 +3,7 @@ class EpdFontFamily { public: - enum Style : uint8_t { REGULAR = 0, BOLD = 1, ITALIC = 2, BOLD_ITALIC = 3 }; + enum Style : uint8_t { REGULAR = 0, BOLD = 1, ITALIC = 2, BOLD_ITALIC = 3, UNDERLINE = 4 }; explicit EpdFontFamily(const EpdFont* regular, const EpdFont* bold = nullptr, const EpdFont* italic = nullptr, const EpdFont* boldItalic = nullptr) diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 7559e3b3..5dc02358 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -86,6 +86,10 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNavItem = opfParser.tocNavPath; } + if (!opfParser.cssFiles.empty()) { + cssFiles = opfParser.cssFiles; + } + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } @@ -204,15 +208,91 @@ bool Epub::parseTocNavFile() const { return true; } +std::string Epub::getCssRulesCache() const { return cachePath + "/css_rules.cache"; } + +bool Epub::loadCssRulesFromCache() const { + FsFile cssCacheFile; + if (SdMan.openFileForRead("EBP", getCssRulesCache(), cssCacheFile)) { + if (cssParser->loadFromCache(cssCacheFile)) { + cssCacheFile.close(); + Serial.printf("[%lu] [EBP] Loaded CSS rules from cache\n", millis()); + return true; + } + cssCacheFile.close(); + Serial.printf("[%lu] [EBP] CSS cache invalid, reparsing\n", millis()); + } + return false; +} + +void Epub::parseCssFiles() const { + if (cssFiles.empty()) { + Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis()); + } + + // Try to load from CSS cache first + if (!loadCssRulesFromCache()) { + // Cache miss - parse CSS files + for (const auto& cssPath : cssFiles) { + Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str()); + + // Extract CSS file to temp location + const auto tmpCssPath = getCachePath() + "/.tmp.css"; + FsFile tempCssFile; + if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis()); + continue; + } + if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) { + Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str()); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + tempCssFile.close(); + + // Parse the CSS file + if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis()); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + cssParser->loadFromStream(tempCssFile); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + } + + // Save to cache for next time + FsFile cssCacheFile; + if (SdMan.openFileForWrite("EBP", getCssRulesCache(), cssCacheFile)) { + cssParser->saveToCache(cssCacheFile); + cssCacheFile.close(); + } + + Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(), + cssFiles.size()); + } +} + // load in the meta data for the epub file -bool Epub::load(const bool buildIfMissing) { +bool Epub::load(const bool buildIfMissing, const bool skipLoadingCss) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); // Initialize spine/TOC cache bookMetadataCache.reset(new BookMetadataCache(cachePath)); + // Always create CssParser - needed for inline style parsing even without CSS files + cssParser.reset(new CssParser()); // Try to load existing cache first if (bookMetadataCache->load()) { + if (!skipLoadingCss && !loadCssRulesFromCache()) { + Serial.printf("[%lu] [EBP] Warning: CSS rules cache not found, attempting to parse CSS files\n", millis()); + // to get CSS file list + if (!parseContentOpf(bookMetadataCache->coreMetadata)) { + Serial.printf("[%lu] [EBP] Could not parse content.opf from cached bookMetadata for CSS files\n", millis()); + // continue anyway - book will work without CSS and we'll still load any inline style CSS + } + parseCssFiles(); + } Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } @@ -309,6 +389,11 @@ bool Epub::load(const bool buildIfMissing) { return false; } + if (!skipLoadingCss) { + // Parse CSS files after cache reload + parseCssFiles(); + } + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 7a21efd5..85a601a5 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -8,6 +8,7 @@ #include #include "Epub/BookMetadataCache.h" +#include "Epub/css/CssParser.h" class ZipFile; @@ -24,11 +25,18 @@ class Epub { std::string cachePath; // Spine and TOC cache std::unique_ptr bookMetadataCache; + // CSS parser for styling + std::unique_ptr cssParser; + // CSS files + std::vector cssFiles; bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; + void parseCssFiles() const; + std::string getCssRulesCache() const; + bool loadCssRulesFromCache() const; public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -37,7 +45,7 @@ class Epub { } ~Epub() = default; std::string& getBasePath() { return contentBasePath; } - bool load(bool buildIfMissing = true); + bool load(bool buildIfMissing = true, bool skipLoadingCss = false); bool clearCache() const; void setupCacheDir() const; const std::string& getCachePath() const; @@ -64,4 +72,5 @@ class Epub { size_t getBookSize() const; float calculateProgress(int currentSpineIndex, float currentSpineRead) const; + const CssParser* getCssParser() const { return cssParser.get(); } }; diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 81d688ec..aca85581 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -49,11 +49,15 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s } // namespace -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style style, const bool underline) { if (word.empty()) return; words.push_back(std::move(word)); - wordStyles.push_back(fontStyle); + EpdFontFamily::Style combinedStyle = style; + if (underline) { + combinedStyle = static_cast(combinedStyle | EpdFontFamily::UNDERLINE); + } + wordStyles.push_back(combinedStyle); } // Consumes data to minimize memory usage @@ -109,10 +113,19 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c return {}; } + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; + // Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation. for (size_t i = 0; i < wordWidths.size(); ++i) { - while (wordWidths[i] > pageWidth) { - if (!hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { + // First word needs to fit in reduced width if there's an indent + const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; + while (wordWidths[i] > effectiveWidth) { + if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { break; } } @@ -133,11 +146,14 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c int currlen = -spaceWidth; dp[i] = MAX_COST; + // First line has reduced width due to text-indent + const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; + for (size_t j = i; j < totalWordCount; ++j) { // Current line length: previous width + space + current word width currlen += wordWidths[j] + spaceWidth; - if (currlen > pageWidth) { + if (currlen > effectivePageWidth) { break; } @@ -145,7 +161,7 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c if (j == totalWordCount - 1) { cost = 0; // Last line } else { - const int remainingSpace = pageWidth - currlen; + const int remainingSpace = effectivePageWidth - currlen; // Use long long for the square to prevent overflow const long long cost_ll = static_cast(remainingSpace) * remainingSpace + dp[j + 1]; @@ -200,7 +216,11 @@ void ParsedText::applyParagraphIndent() { return; } - if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) { + if (blockStyle.textIndentDefined) { + // CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace + // The actual indent positioning is handled in extractLine() + } else if (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) { + // No CSS text-indent defined - use EmSpace fallback for visual indent words.front().insert(0, "\xe2\x80\x83"); } } @@ -209,13 +229,24 @@ void ParsedText::applyParagraphIndent() { std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth, const int spaceWidth, std::vector& wordWidths) { + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; + std::vector lineBreakIndices; size_t currentIndex = 0; + bool isFirstLine = true; while (currentIndex < wordWidths.size()) { const size_t lineStart = currentIndex; int lineWidth = 0; + // First line has reduced width due to text-indent + const int effectivePageWidth = isFirstLine ? pageWidth - firstLineIndent : pageWidth; + // Consume as many words as possible for current line, splitting when prefixes fit while (currentIndex < wordWidths.size()) { const bool isFirstWord = currentIndex == lineStart; @@ -223,14 +254,14 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r const int candidateWidth = spacing + wordWidths[currentIndex]; // Word fits on current line - if (lineWidth + candidateWidth <= pageWidth) { + if (lineWidth + candidateWidth <= effectivePageWidth) { lineWidth += candidateWidth; ++currentIndex; continue; } // Word would overflow — try to split based on hyphenation points - const int availableWidth = pageWidth - lineWidth - spacing; + const int availableWidth = effectivePageWidth - lineWidth - spacing; const bool allowFallbackBreaks = isFirstWord; // Only for first word on line if (availableWidth > 0 && @@ -250,6 +281,7 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r } lineBreakIndices.push_back(currentIndex); + isFirstLine = false; } return lineBreakIndices; @@ -334,27 +366,36 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lineWordCount = lineBreak - lastBreakAt; + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const bool isFirstLine = breakIndex == 0; + const int firstLineIndent = + isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; + // Calculate total word width for this line int lineWordWidthSum = 0; for (size_t i = lastBreakAt; i < lineBreak; i++) { lineWordWidthSum += wordWidths[i]; } - // Calculate spacing - const int spareSpace = pageWidth - lineWordWidthSum; + // Calculate spacing (account for indent reducing effective page width on first line) + const int effectivePageWidth = pageWidth - firstLineIndent; + const int spareSpace = effectivePageWidth - lineWordWidthSum; int spacing = spaceWidth; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { + if (blockStyle.alignment == CssTextAlign::Justify && !isLastLine && lineWordCount >= 2) { spacing = spareSpace / (lineWordCount - 1); } - // Calculate initial x position - uint16_t xpos = 0; - if (style == TextBlock::RIGHT_ALIGN) { + // Calculate initial x position (first line starts at indent for left/justified text) + auto xpos = static_cast(firstLineIndent); + if (blockStyle.alignment == CssTextAlign::Right) { xpos = spareSpace - (lineWordCount - 1) * spaceWidth; - } else if (style == TextBlock::CENTER_ALIGN) { + } else if (blockStyle.alignment == CssTextAlign::Center) { xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; } @@ -384,5 +425,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const } } - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); -} \ No newline at end of file + processLine( + std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle)); +} diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index e72db7ef..a13d13b5 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -8,6 +8,7 @@ #include #include +#include "blocks/BlockStyle.h" #include "blocks/TextBlock.h" class GfxRenderer; @@ -15,7 +16,7 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; - TextBlock::Style style; + BlockStyle blockStyle; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -32,14 +33,14 @@ class ParsedText { std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: - explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, - const bool hyphenationEnabled = false) - : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} + explicit ParsedText(const bool extraParagraphSpacing, const bool hyphenationEnabled = false, + const BlockStyle& blockStyle = BlockStyle()) + : blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; - void addWord(std::string word, EpdFontFamily::Style fontStyle); - void setStyle(const TextBlock::Style style) { this->style = style; } - TextBlock::Style getStyle() const { return style; } + void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } + BlockStyle& getBlockStyle() { return blockStyle; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index cf67108b..9cb70027 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -8,7 +8,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 10; +constexpr uint8_t SECTION_FILE_VERSION = 11; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(uint32_t); @@ -179,7 +179,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, - [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, popupFn); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, popupFn, + epub->getCssParser()); Hyphenator::setPreferredLanguage(epub->getLanguage()); success = visitor.parseAndBuildPages(); diff --git a/lib/Epub/Epub/blocks/BlockStyle.h b/lib/Epub/Epub/blocks/BlockStyle.h new file mode 100644 index 00000000..5c26a21d --- /dev/null +++ b/lib/Epub/Epub/blocks/BlockStyle.h @@ -0,0 +1,90 @@ +#pragma once + +#include + +#include "Epub/css/CssStyle.h" + +/** + * BlockStyle - Block-level styling properties + */ +struct BlockStyle { + CssTextAlign alignment = CssTextAlign::Justify; + + // Spacing (in pixels) + int16_t marginTop = 0; + int16_t marginBottom = 0; + int16_t marginLeft = 0; + int16_t marginRight = 0; + int16_t paddingTop = 0; // treated same as margin for rendering + int16_t paddingBottom = 0; // treated same as margin for rendering + int16_t paddingLeft = 0; // treated same as margin for rendering + int16_t paddingRight = 0; // treated same as margin for rendering + int16_t textIndent = 0; + bool textIndentDefined = false; // true if text-indent was explicitly set in CSS + bool textAlignDefined = false; // true if text-align was explicitly set in CSS + + // Combined horizontal insets (margin + padding) + [[nodiscard]] int16_t leftInset() const { return marginLeft + paddingLeft; } + [[nodiscard]] int16_t rightInset() const { return marginRight + paddingRight; } + [[nodiscard]] int16_t totalHorizontalInset() const { return leftInset() + rightInset(); } + + // Combine with another block style. Useful for parent -> child styles, where the child style should be + // applied on top of the parent's style to get the combined style. + BlockStyle getCombinedBlockStyle(const BlockStyle& child) const { + BlockStyle combinedBlockStyle; + + combinedBlockStyle.marginTop = static_cast(child.marginTop + marginTop); + combinedBlockStyle.marginBottom = static_cast(child.marginBottom + marginBottom); + combinedBlockStyle.marginLeft = static_cast(child.marginLeft + marginLeft); + combinedBlockStyle.marginRight = static_cast(child.marginRight + marginRight); + + combinedBlockStyle.paddingTop = static_cast(child.paddingTop + paddingTop); + combinedBlockStyle.paddingBottom = static_cast(child.paddingBottom + paddingBottom); + combinedBlockStyle.paddingLeft = static_cast(child.paddingLeft + paddingLeft); + combinedBlockStyle.paddingRight = static_cast(child.paddingRight + paddingRight); + // Text indent: use child's if defined + if (child.textIndentDefined) { + combinedBlockStyle.textIndent = child.textIndent; + combinedBlockStyle.textIndentDefined = true; + } else { + combinedBlockStyle.textIndent = textIndent; + combinedBlockStyle.textIndentDefined = textIndentDefined; + } + // Text align: use child's if defined + if (child.textAlignDefined) { + combinedBlockStyle.alignment = child.alignment; + combinedBlockStyle.textAlignDefined = true; + } else { + combinedBlockStyle.alignment = alignment; + combinedBlockStyle.textAlignDefined = textAlignDefined; + } + return combinedBlockStyle; + } + + // Create a BlockStyle from CSS style properties, resolving CssLength values to pixels + // emSize is the current font line height, used for em/rem unit conversion + // paragraphAlignment is the user's paragraphAlignment setting preference + static BlockStyle fromCssStyle(const CssStyle& cssStyle, const float emSize, const CssTextAlign paragraphAlignment) { + BlockStyle blockStyle; + // Resolve all CssLength values to pixels using the current font's em size + blockStyle.marginTop = cssStyle.marginTop.toPixelsInt16(emSize); + blockStyle.marginBottom = cssStyle.marginBottom.toPixelsInt16(emSize); + blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize); + blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize); + + blockStyle.paddingTop = cssStyle.paddingTop.toPixelsInt16(emSize); + blockStyle.paddingBottom = cssStyle.paddingBottom.toPixelsInt16(emSize); + blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); + blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); + + blockStyle.textIndent = cssStyle.textIndent.toPixelsInt16(emSize); + blockStyle.textIndentDefined = cssStyle.hasTextIndent(); + blockStyle.textAlignDefined = cssStyle.hasTextAlign(); + if (blockStyle.textAlignDefined) { + blockStyle.alignment = cssStyle.textAlign; + } else { + blockStyle.alignment = paragraphAlignment; + } + return blockStyle; + } +}; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 2a15aef0..3ab25558 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -14,9 +14,32 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); - for (size_t i = 0; i < words.size(); i++) { - renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt); + const int wordX = *wordXposIt + x; + const EpdFontFamily::Style currentStyle = *wordStylesIt; + renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle); + + if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) { + const std::string& w = *wordIt; + const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle); + // y is the top of the text line; add ascender to reach baseline, then offset 2px below + const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; + + int startX = wordX; + int underlineWidth = fullWordWidth; + + // if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line + if (w.size() >= 3 && static_cast(w[0]) == 0xE2 && static_cast(w[1]) == 0x80 && + static_cast(w[2]) == 0x83) { + const char* visiblePtr = w.c_str() + 3; + const int prefixWidth = renderer.getTextAdvanceX(fontId, std::string("\xe2\x80\x83").c_str()); + const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, currentStyle); + startX = wordX + prefixWidth; + underlineWidth = visibleWidth; + } + + renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true); + } std::advance(wordIt, 1); std::advance(wordStylesIt, 1); @@ -37,8 +60,19 @@ bool TextBlock::serialize(FsFile& file) const { for (auto x : wordXpos) serialization::writePod(file, x); for (auto s : wordStyles) serialization::writePod(file, s); - // Block style - serialization::writePod(file, style); + // Style (alignment + margins/padding/indent) + serialization::writePod(file, blockStyle.alignment); + serialization::writePod(file, blockStyle.textAlignDefined); + serialization::writePod(file, blockStyle.marginTop); + serialization::writePod(file, blockStyle.marginBottom); + serialization::writePod(file, blockStyle.marginLeft); + serialization::writePod(file, blockStyle.marginRight); + serialization::writePod(file, blockStyle.paddingTop); + serialization::writePod(file, blockStyle.paddingBottom); + serialization::writePod(file, blockStyle.paddingLeft); + serialization::writePod(file, blockStyle.paddingRight); + serialization::writePod(file, blockStyle.textIndent); + serialization::writePod(file, blockStyle.textIndentDefined); return true; } @@ -48,7 +82,7 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { std::list words; std::list wordXpos; std::list wordStyles; - Style style; + BlockStyle blockStyle; // Word count serialization::readPod(file, wc); @@ -67,8 +101,20 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& s : wordStyles) serialization::readPod(file, s); - // Block style - serialization::readPod(file, style); + // Style (alignment + margins/padding/indent) + serialization::readPod(file, blockStyle.alignment); + serialization::readPod(file, blockStyle.textAlignDefined); + serialization::readPod(file, blockStyle.marginTop); + serialization::readPod(file, blockStyle.marginBottom); + serialization::readPod(file, blockStyle.marginLeft); + serialization::readPod(file, blockStyle.marginRight); + serialization::readPod(file, blockStyle.paddingTop); + serialization::readPod(file, blockStyle.paddingBottom); + serialization::readPod(file, blockStyle.paddingLeft); + serialization::readPod(file, blockStyle.paddingRight); + serialization::readPod(file, blockStyle.textIndent); + serialization::readPod(file, blockStyle.textIndentDefined); - return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style)); + return std::unique_ptr( + new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), blockStyle)); } diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 415a18f3..e233f77f 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -7,30 +7,26 @@ #include #include "Block.h" +#include "BlockStyle.h" // Represents a line of text on a page class TextBlock final : public Block { - public: - enum Style : uint8_t { - JUSTIFIED = 0, - LEFT_ALIGN = 1, - CENTER_ALIGN = 2, - RIGHT_ALIGN = 3, - }; - private: std::list words; std::list wordXpos; std::list wordStyles; - Style style; + BlockStyle blockStyle; public: explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const Style style) - : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {} + std::list word_styles, const BlockStyle& blockStyle = BlockStyle()) + : words(std::move(words)), + wordXpos(std::move(word_xpos)), + wordStyles(std::move(word_styles)), + blockStyle(blockStyle) {} ~TextBlock() override = default; - void setStyle(const Style style) { this->style = style; } - Style getStyle() const { return style; } + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } + const BlockStyle& getBlockStyle() const { return blockStyle; } bool isEmpty() override { return words.empty(); } void layout(GfxRenderer& renderer) override {}; // given a renderer works out where to break the words into lines diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp new file mode 100644 index 00000000..d51ebba7 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -0,0 +1,697 @@ +#include "CssParser.h" + +#include + +#include +#include + +namespace { + +// Buffer size for reading CSS files +constexpr size_t READ_BUFFER_SIZE = 512; + +// Maximum CSS file size we'll process (prevent memory issues) +constexpr size_t MAX_CSS_SIZE = 64 * 1024; + +// Check if character is CSS whitespace +bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; } + +// Read entire file into string (with size limit) +std::string readFileContent(FsFile& file) { + std::string content; + content.reserve(std::min(static_cast(file.size()), MAX_CSS_SIZE)); + + char buffer[READ_BUFFER_SIZE]; + while (file.available() && content.size() < MAX_CSS_SIZE) { + const int bytesRead = file.read(buffer, sizeof(buffer)); + if (bytesRead <= 0) break; + content.append(buffer, bytesRead); + } + return content; +} + +// Remove CSS comments (/* ... */) from content +std::string stripComments(const std::string& css) { + std::string result; + result.reserve(css.size()); + + size_t pos = 0; + while (pos < css.size()) { + // Look for start of comment + if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') { + // Find end of comment + const size_t endPos = css.find("*/", pos + 2); + if (endPos == std::string::npos) { + // Unterminated comment - skip rest of file + break; + } + pos = endPos + 2; + } else { + result.push_back(css[pos]); + ++pos; + } + } + return result; +} + +// Skip @-rules (like @media, @import, @font-face) +// Returns position after the @-rule +size_t skipAtRule(const std::string& css, const size_t start) { + // Find the end - either semicolon (simple @-rule) or matching brace + size_t pos = start + 1; // Skip the '@' + + // Skip identifier + while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) { + ++pos; + } + + // Look for { or ; + int braceDepth = 0; + while (pos < css.size()) { + const char c = css[pos]; + if (c == '{') { + ++braceDepth; + } else if (c == '}') { + --braceDepth; + if (braceDepth == 0) { + return pos + 1; + } + } else if (c == ';' && braceDepth == 0) { + return pos + 1; + } + ++pos; + } + return css.size(); +} + +// Extract next rule from CSS content +// Returns true if a rule was found, with selector and body filled +bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) { + selector.clear(); + body.clear(); + + // Skip whitespace and @-rules until we find a regular rule + while (pos < css.size()) { + // Skip whitespace + while (pos < css.size() && isCssWhitespace(css[pos])) { + ++pos; + } + + if (pos >= css.size()) return false; + + // Handle @-rules iteratively (avoids recursion/stack overflow) + if (css[pos] == '@') { + pos = skipAtRule(css, pos); + continue; // Try again after skipping the @-rule + } + + break; // Found start of a regular rule + } + + if (pos >= css.size()) return false; + + // Find opening brace + const size_t bracePos = css.find('{', pos); + if (bracePos == std::string::npos) return false; + + // Extract selector (everything before the brace) + selector = css.substr(pos, bracePos - pos); + + // Find matching closing brace + int depth = 1; + const size_t bodyStart = bracePos + 1; + size_t bodyEnd = bodyStart; + + while (bodyEnd < css.size() && depth > 0) { + if (css[bodyEnd] == '{') + ++depth; + else if (css[bodyEnd] == '}') + --depth; + ++bodyEnd; + } + + // Extract body (between braces) + if (bodyEnd > bodyStart) { + body = css.substr(bodyStart, bodyEnd - bodyStart - 1); + } + + pos = bodyEnd; + return true; +} + +} // anonymous namespace + +// String utilities implementation + +std::string CssParser::normalized(const std::string& s) { + std::string result; + result.reserve(s.size()); + + bool inSpace = true; // Start true to skip leading space + for (const char c : s) { + if (isCssWhitespace(c)) { + if (!inSpace) { + result.push_back(' '); + inSpace = true; + } + } else { + result.push_back(static_cast(std::tolower(static_cast(c)))); + inSpace = false; + } + } + + // Remove trailing space + if (!result.empty() && result.back() == ' ') { + result.pop_back(); + } + return result; +} + +std::vector CssParser::splitOnChar(const std::string& s, const char delimiter) { + std::vector parts; + size_t start = 0; + + for (size_t i = 0; i <= s.size(); ++i) { + if (i == s.size() || s[i] == delimiter) { + std::string part = s.substr(start, i - start); + std::string trimmed = normalized(part); + if (!trimmed.empty()) { + parts.push_back(trimmed); + } + start = i + 1; + } + } + return parts; +} + +std::vector CssParser::splitWhitespace(const std::string& s) { + std::vector parts; + size_t start = 0; + bool inWord = false; + + for (size_t i = 0; i <= s.size(); ++i) { + const bool isSpace = i == s.size() || isCssWhitespace(s[i]); + if (isSpace && inWord) { + parts.push_back(s.substr(start, i - start)); + inWord = false; + } else if (!isSpace && !inWord) { + start = i; + inWord = true; + } + } + return parts; +} + +// Property value interpreters + +CssTextAlign CssParser::interpretAlignment(const std::string& val) { + const std::string v = normalized(val); + + if (v == "left" || v == "start") return CssTextAlign::Left; + if (v == "right" || v == "end") return CssTextAlign::Right; + if (v == "center") return CssTextAlign::Center; + if (v == "justify") return CssTextAlign::Justify; + + return CssTextAlign::Left; +} + +CssFontStyle CssParser::interpretFontStyle(const std::string& val) { + const std::string v = normalized(val); + + if (v == "italic" || v == "oblique") return CssFontStyle::Italic; + return CssFontStyle::Normal; +} + +CssFontWeight CssParser::interpretFontWeight(const std::string& val) { + const std::string v = normalized(val); + + // Named values + if (v == "bold" || v == "bolder") return CssFontWeight::Bold; + if (v == "normal" || v == "lighter") return CssFontWeight::Normal; + + // Numeric values: 100-900 + // CSS spec: 400 = normal, 700 = bold + // We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative) + char* endPtr = nullptr; + const long numericWeight = std::strtol(v.c_str(), &endPtr, 10); + + // If we parsed a number and consumed the whole string + if (endPtr != v.c_str() && *endPtr == '\0') { + return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal; + } + + return CssFontWeight::Normal; +} + +CssTextDecoration CssParser::interpretDecoration(const std::string& val) { + const std::string v = normalized(val); + + // text-decoration can have multiple space-separated values + if (v.find("underline") != std::string::npos) { + return CssTextDecoration::Underline; + } + return CssTextDecoration::None; +} + +CssLength CssParser::interpretLength(const std::string& val) { + const std::string v = normalized(val); + if (v.empty()) return CssLength{}; + + // Find where the number ends + size_t unitStart = v.size(); + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + // Parse numeric value + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + if (endPtr == numPart.c_str()) return CssLength{}; // No number parsed + + // Determine unit type (preserve for deferred resolution) + auto unit = CssUnit::Pixels; + if (unitPart == "em") { + unit = CssUnit::Em; + } else if (unitPart == "rem") { + unit = CssUnit::Rem; + } else if (unitPart == "pt") { + unit = CssUnit::Points; + } + // px and unitless default to Pixels + + return CssLength{numericValue, unit}; +} + +int8_t CssParser::interpretSpacing(const std::string& val) { + const std::string v = normalized(val); + if (v.empty()) return 0; + + // For spacing, we convert to "lines" (discrete units for e-ink) + // 1em ≈ 1 line, percentages based on ~30 lines per page + + float multiplier = 0.0f; + size_t unitStart = v.size(); + + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + if (unitPart == "em" || unitPart == "rem") { + multiplier = 1.0f; // 1em = 1 line + } else if (unitPart == "%") { + multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines + } else { + return 0; // Unsupported unit for spacing + } + + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + + if (endPtr == numPart.c_str()) return 0; + + int lines = static_cast(numericValue * multiplier); + + // Clamp to reasonable range (0-2 lines) + if (lines < 0) lines = 0; + if (lines > 2) lines = 2; + + return static_cast(lines); +} + +// Declaration parsing + +CssStyle CssParser::parseDeclarations(const std::string& declBlock) { + CssStyle style; + + // Split declarations by semicolon + const auto declarations = splitOnChar(declBlock, ';'); + + for (const auto& decl : declarations) { + // Find colon separator + const size_t colonPos = decl.find(':'); + if (colonPos == std::string::npos || colonPos == 0) continue; + + std::string propName = normalized(decl.substr(0, colonPos)); + std::string propValue = normalized(decl.substr(colonPos + 1)); + + if (propName.empty() || propValue.empty()) continue; + + // Match property and set value + if (propName == "text-align") { + style.textAlign = interpretAlignment(propValue); + style.defined.textAlign = 1; + } else if (propName == "font-style") { + style.fontStyle = interpretFontStyle(propValue); + style.defined.fontStyle = 1; + } else if (propName == "font-weight") { + style.fontWeight = interpretFontWeight(propValue); + style.defined.fontWeight = 1; + } else if (propName == "text-decoration" || propName == "text-decoration-line") { + style.textDecoration = interpretDecoration(propValue); + style.defined.textDecoration = 1; + } else if (propName == "text-indent") { + style.textIndent = interpretLength(propValue); + style.defined.textIndent = 1; + } else if (propName == "margin-top") { + style.marginTop = interpretLength(propValue); + style.defined.marginTop = 1; + } else if (propName == "margin-bottom") { + style.marginBottom = interpretLength(propValue); + style.defined.marginBottom = 1; + } else if (propName == "margin-left") { + style.marginLeft = interpretLength(propValue); + style.defined.marginLeft = 1; + } else if (propName == "margin-right") { + style.marginRight = interpretLength(propValue); + style.defined.marginRight = 1; + } else if (propName == "margin") { + // Shorthand: 1-4 values for top, right, bottom, left + const auto values = splitWhitespace(propValue); + if (!values.empty()) { + style.marginTop = interpretLength(values[0]); + style.marginRight = values.size() >= 2 ? interpretLength(values[1]) : style.marginTop; + style.marginBottom = values.size() >= 3 ? interpretLength(values[2]) : style.marginTop; + style.marginLeft = values.size() >= 4 ? interpretLength(values[3]) : style.marginRight; + style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; + } + } else if (propName == "padding-top") { + style.paddingTop = interpretLength(propValue); + style.defined.paddingTop = 1; + } else if (propName == "padding-bottom") { + style.paddingBottom = interpretLength(propValue); + style.defined.paddingBottom = 1; + } else if (propName == "padding-left") { + style.paddingLeft = interpretLength(propValue); + style.defined.paddingLeft = 1; + } else if (propName == "padding-right") { + style.paddingRight = interpretLength(propValue); + style.defined.paddingRight = 1; + } else if (propName == "padding") { + // Shorthand: 1-4 values for top, right, bottom, left + const auto values = splitWhitespace(propValue); + if (!values.empty()) { + style.paddingTop = interpretLength(values[0]); + style.paddingRight = values.size() >= 2 ? interpretLength(values[1]) : style.paddingTop; + style.paddingBottom = values.size() >= 3 ? interpretLength(values[2]) : style.paddingTop; + style.paddingLeft = values.size() >= 4 ? interpretLength(values[3]) : style.paddingRight; + style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = + style.defined.paddingLeft = 1; + } + } + } + + return style; +} + +// Rule processing + +void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) { + const CssStyle style = parseDeclarations(declarations); + + // Only store if any properties were set + if (!style.defined.anySet()) return; + + // Handle comma-separated selectors + const auto selectors = splitOnChar(selectorGroup, ','); + + for (const auto& sel : selectors) { + // Normalize the selector + std::string key = normalized(sel); + if (key.empty()) continue; + + // Store or merge with existing + auto it = rulesBySelector_.find(key); + if (it != rulesBySelector_.end()) { + it->second.applyOver(style); + } else { + rulesBySelector_[key] = style; + } + } +} + +// Main parsing entry point + +bool CssParser::loadFromStream(FsFile& source) { + if (!source) { + Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis()); + return false; + } + + // Read file content + const std::string content = readFileContent(source); + if (content.empty()) { + return true; // Empty file is valid + } + + // Remove comments + const std::string cleaned = stripComments(content); + + // Parse rules + size_t pos = 0; + std::string selector, body; + + while (extractNextRule(cleaned, pos, selector, body)) { + processRuleBlock(selector, body); + } + + Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size()); + return true; +} + +// Style resolution + +CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const { + CssStyle result; + const std::string tag = normalized(tagName); + + // 1. Apply element-level style (lowest priority) + const auto tagIt = rulesBySelector_.find(tag); + if (tagIt != rulesBySelector_.end()) { + result.applyOver(tagIt->second); + } + + // 2. Apply class styles (medium priority) + if (!classAttr.empty()) { + const auto classes = splitWhitespace(classAttr); + + for (const auto& cls : classes) { + std::string classKey = "." + normalized(cls); + + auto classIt = rulesBySelector_.find(classKey); + if (classIt != rulesBySelector_.end()) { + result.applyOver(classIt->second); + } + } + + // 3. Apply element.class styles (higher priority) + for (const auto& cls : classes) { + std::string combinedKey = tag + "." + normalized(cls); + + auto combinedIt = rulesBySelector_.find(combinedKey); + if (combinedIt != rulesBySelector_.end()) { + result.applyOver(combinedIt->second); + } + } + } + + return result; +} + +// Inline style parsing (static - doesn't need rule database) + +CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); } + +// Cache serialization + +// Cache format version - increment when format changes +constexpr uint8_t CSS_CACHE_VERSION = 1; + +bool CssParser::saveToCache(FsFile& file) const { + if (!file) { + return false; + } + + // Write version + file.write(CSS_CACHE_VERSION); + + // Write rule count + const auto ruleCount = static_cast(rulesBySelector_.size()); + file.write(reinterpret_cast(&ruleCount), sizeof(ruleCount)); + + // Write each rule: selector string + CssStyle fields + for (const auto& pair : rulesBySelector_) { + // Write selector string (length-prefixed) + const auto selectorLen = static_cast(pair.first.size()); + file.write(reinterpret_cast(&selectorLen), sizeof(selectorLen)); + file.write(reinterpret_cast(pair.first.data()), selectorLen); + + // Write CssStyle fields (all are POD types) + const CssStyle& style = pair.second; + file.write(static_cast(style.textAlign)); + file.write(static_cast(style.fontStyle)); + file.write(static_cast(style.fontWeight)); + file.write(static_cast(style.textDecoration)); + + // Write CssLength fields (value + unit) + auto writeLength = [&file](const CssLength& len) { + file.write(reinterpret_cast(&len.value), sizeof(len.value)); + file.write(static_cast(len.unit)); + }; + + writeLength(style.textIndent); + writeLength(style.marginTop); + writeLength(style.marginBottom); + writeLength(style.marginLeft); + writeLength(style.marginRight); + writeLength(style.paddingTop); + writeLength(style.paddingBottom); + writeLength(style.paddingLeft); + writeLength(style.paddingRight); + + // Write defined flags as uint16_t + uint16_t definedBits = 0; + if (style.defined.textAlign) definedBits |= 1 << 0; + if (style.defined.fontStyle) definedBits |= 1 << 1; + if (style.defined.fontWeight) definedBits |= 1 << 2; + if (style.defined.textDecoration) definedBits |= 1 << 3; + if (style.defined.textIndent) definedBits |= 1 << 4; + if (style.defined.marginTop) definedBits |= 1 << 5; + if (style.defined.marginBottom) definedBits |= 1 << 6; + if (style.defined.marginLeft) definedBits |= 1 << 7; + if (style.defined.marginRight) definedBits |= 1 << 8; + if (style.defined.paddingTop) definedBits |= 1 << 9; + if (style.defined.paddingBottom) definedBits |= 1 << 10; + if (style.defined.paddingLeft) definedBits |= 1 << 11; + if (style.defined.paddingRight) definedBits |= 1 << 12; + file.write(reinterpret_cast(&definedBits), sizeof(definedBits)); + } + + Serial.printf("[%lu] [CSS] Saved %u rules to cache\n", millis(), ruleCount); + return true; +} + +bool CssParser::loadFromCache(FsFile& file) { + if (!file) { + return false; + } + + // Clear existing rules + clear(); + + // Read and verify version + uint8_t version = 0; + if (file.read(&version, 1) != 1 || version != CSS_CACHE_VERSION) { + Serial.printf("[%lu] [CSS] Cache version mismatch (got %u, expected %u)\n", millis(), version, CSS_CACHE_VERSION); + return false; + } + + // Read rule count + uint16_t ruleCount = 0; + if (file.read(&ruleCount, sizeof(ruleCount)) != sizeof(ruleCount)) { + return false; + } + + // Read each rule + for (uint16_t i = 0; i < ruleCount; ++i) { + // Read selector string + uint16_t selectorLen = 0; + if (file.read(&selectorLen, sizeof(selectorLen)) != sizeof(selectorLen)) { + rulesBySelector_.clear(); + return false; + } + + std::string selector; + selector.resize(selectorLen); + if (file.read(&selector[0], selectorLen) != selectorLen) { + rulesBySelector_.clear(); + return false; + } + + // Read CssStyle fields + CssStyle style; + uint8_t enumVal; + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.textAlign = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.fontStyle = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.fontWeight = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.textDecoration = static_cast(enumVal); + + // Read CssLength fields + auto readLength = [&file](CssLength& len) -> bool { + if (file.read(&len.value, sizeof(len.value)) != sizeof(len.value)) { + return false; + } + uint8_t unitVal; + if (file.read(&unitVal, 1) != 1) { + return false; + } + len.unit = static_cast(unitVal); + return true; + }; + + if (!readLength(style.textIndent) || !readLength(style.marginTop) || !readLength(style.marginBottom) || + !readLength(style.marginLeft) || !readLength(style.marginRight) || !readLength(style.paddingTop) || + !readLength(style.paddingBottom) || !readLength(style.paddingLeft) || !readLength(style.paddingRight)) { + rulesBySelector_.clear(); + return false; + } + + // Read defined flags + uint16_t definedBits = 0; + if (file.read(&definedBits, sizeof(definedBits)) != sizeof(definedBits)) { + rulesBySelector_.clear(); + return false; + } + style.defined.textAlign = (definedBits & 1 << 0) != 0; + style.defined.fontStyle = (definedBits & 1 << 1) != 0; + style.defined.fontWeight = (definedBits & 1 << 2) != 0; + style.defined.textDecoration = (definedBits & 1 << 3) != 0; + style.defined.textIndent = (definedBits & 1 << 4) != 0; + style.defined.marginTop = (definedBits & 1 << 5) != 0; + style.defined.marginBottom = (definedBits & 1 << 6) != 0; + style.defined.marginLeft = (definedBits & 1 << 7) != 0; + style.defined.marginRight = (definedBits & 1 << 8) != 0; + style.defined.paddingTop = (definedBits & 1 << 9) != 0; + style.defined.paddingBottom = (definedBits & 1 << 10) != 0; + style.defined.paddingLeft = (definedBits & 1 << 11) != 0; + style.defined.paddingRight = (definedBits & 1 << 12) != 0; + + rulesBySelector_[selector] = style; + } + + Serial.printf("[%lu] [CSS] Loaded %u rules from cache\n", millis(), ruleCount); + return true; +} diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h new file mode 100644 index 00000000..0e5a1b34 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.h @@ -0,0 +1,114 @@ +#pragma once + +#include + +#include +#include +#include + +#include "CssStyle.h" + +/** + * Lightweight CSS parser for EPUB stylesheets + * + * Parses CSS files and extracts styling information relevant for e-ink display. + * Uses a two-phase approach: first tokenizes the CSS content, then builds + * a rule database that can be queried during HTML parsing. + * + * Supported selectors: + * - Element selectors: p, div, h1, etc. + * - Class selectors: .classname + * - Combined: element.classname + * - Grouped: selector1, selector2 { } + * + * Not supported (silently ignored): + * - Descendant/child selectors + * - Pseudo-classes and pseudo-elements + * - Media queries (content is skipped) + * - @import, @font-face, etc. + */ +class CssParser { + public: + CssParser() = default; + ~CssParser() = default; + + // Non-copyable + CssParser(const CssParser&) = delete; + CssParser& operator=(const CssParser&) = delete; + + /** + * Load and parse CSS from a file stream. + * Can be called multiple times to accumulate rules from multiple stylesheets. + * @param source Open file handle to read from + * @return true if parsing completed (even if no rules found) + */ + bool loadFromStream(FsFile& source); + + /** + * Look up the style for an HTML element, considering tag name and class attributes. + * Applies CSS cascade: element style < class style < element.class style + * + * @param tagName The HTML element name (e.g., "p", "div") + * @param classAttr The class attribute value (may contain multiple space-separated classes) + * @return Combined style with all applicable rules merged + */ + [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const; + + /** + * Parse an inline style attribute string. + * @param styleValue The value of a style="" attribute + * @return Parsed style properties + */ + [[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue); + + /** + * Check if any rules have been loaded + */ + [[nodiscard]] bool empty() const { return rulesBySelector_.empty(); } + + /** + * Get count of loaded rule sets + */ + [[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); } + + /** + * Clear all loaded rules + */ + void clear() { rulesBySelector_.clear(); } + + /** + * Save parsed CSS rules to a cache file. + * @param file Open file handle to write to + * @return true if cache was written successfully + */ + bool saveToCache(FsFile& file) const; + + /** + * Load CSS rules from a cache file. + * Clears any existing rules before loading. + * @param file Open file handle to read from + * @return true if cache was loaded successfully + */ + bool loadFromCache(FsFile& file); + + private: + // Storage: maps normalized selector -> style properties + std::unordered_map rulesBySelector_; + + // Internal parsing helpers + void processRuleBlock(const std::string& selectorGroup, const std::string& declarations); + static CssStyle parseDeclarations(const std::string& declBlock); + + // Individual property value parsers + static CssTextAlign interpretAlignment(const std::string& val); + static CssFontStyle interpretFontStyle(const std::string& val); + static CssFontWeight interpretFontWeight(const std::string& val); + static CssTextDecoration interpretDecoration(const std::string& val); + static CssLength interpretLength(const std::string& val); + static int8_t interpretSpacing(const std::string& val); + + // String utilities + static std::string normalized(const std::string& s); + static std::vector splitOnChar(const std::string& s, char delimiter); + static std::vector splitWhitespace(const std::string& s); +}; diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h new file mode 100644 index 00000000..7b83da3f --- /dev/null +++ b/lib/Epub/Epub/css/CssStyle.h @@ -0,0 +1,191 @@ +#pragma once + +#include + +// Matches order of PARAGRAPH_ALIGNMENT in CrossPointSettings +enum class CssTextAlign : uint8_t { Justify = 0, Left = 1, Center = 2, Right = 3 }; +enum class CssUnit : uint8_t { Pixels = 0, Em = 1, Rem = 2, Points = 3 }; + +// Represents a CSS length value with its unit, allowing deferred resolution to pixels +struct CssLength { + float value = 0.0f; + CssUnit unit = CssUnit::Pixels; + + CssLength() = default; + CssLength(const float v, const CssUnit u) : value(v), unit(u) {} + + // Convenience constructor for pixel values (most common case) + explicit CssLength(const float pixels) : value(pixels) {} + + // Resolve to pixels given the current em size (font line height) + [[nodiscard]] float toPixels(const float emSize) const { + switch (unit) { + case CssUnit::Em: + case CssUnit::Rem: + return value * emSize; + case CssUnit::Points: + return value * 1.33f; // Approximate pt to px conversion + default: + return value; + } + } + + // Resolve to int16_t pixels (for BlockStyle fields) + [[nodiscard]] int16_t toPixelsInt16(const float emSize) const { return static_cast(toPixels(emSize)); } +}; + +// Font style options matching CSS font-style property +enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 }; + +// Font weight options - CSS supports 100-900, we simplify to normal/bold +enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 }; + +// Text decoration options +enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 }; + +// Bitmask for tracking which properties have been explicitly set +struct CssPropertyFlags { + uint16_t textAlign : 1; + uint16_t fontStyle : 1; + uint16_t fontWeight : 1; + uint16_t textDecoration : 1; + uint16_t textIndent : 1; + uint16_t marginTop : 1; + uint16_t marginBottom : 1; + uint16_t marginLeft : 1; + uint16_t marginRight : 1; + uint16_t paddingTop : 1; + uint16_t paddingBottom : 1; + uint16_t paddingLeft : 1; + uint16_t paddingRight : 1; + + CssPropertyFlags() + : textAlign(0), + fontStyle(0), + fontWeight(0), + textDecoration(0), + textIndent(0), + marginTop(0), + marginBottom(0), + marginLeft(0), + marginRight(0), + paddingTop(0), + paddingBottom(0), + paddingLeft(0), + paddingRight(0) {} + + [[nodiscard]] bool anySet() const { + return textAlign || fontStyle || fontWeight || textDecoration || textIndent || marginTop || marginBottom || + marginLeft || marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight; + } + + void clearAll() { + textAlign = fontStyle = fontWeight = textDecoration = textIndent = 0; + marginTop = marginBottom = marginLeft = marginRight = 0; + paddingTop = paddingBottom = paddingLeft = paddingRight = 0; + } +}; + +// Represents a collection of CSS style properties +// Only stores properties relevant to e-ink text rendering +// Length values are stored as CssLength (value + unit) for deferred resolution +struct CssStyle { + CssTextAlign textAlign = CssTextAlign::Left; + CssFontStyle fontStyle = CssFontStyle::Normal; + CssFontWeight fontWeight = CssFontWeight::Normal; + CssTextDecoration textDecoration = CssTextDecoration::None; + + CssLength textIndent; // First-line indent (deferred resolution) + CssLength marginTop; // Vertical spacing before block + CssLength marginBottom; // Vertical spacing after block + CssLength marginLeft; // Horizontal spacing left of block + CssLength marginRight; // Horizontal spacing right of block + CssLength paddingTop; // Padding before + CssLength paddingBottom; // Padding after + CssLength paddingLeft; // Padding left + CssLength paddingRight; // Padding right + + CssPropertyFlags defined; // Tracks which properties were explicitly set + + // Apply properties from another style, only overwriting if the other style + // has that property explicitly defined + void applyOver(const CssStyle& base) { + if (base.hasTextAlign()) { + textAlign = base.textAlign; + defined.textAlign = 1; + } + if (base.hasFontStyle()) { + fontStyle = base.fontStyle; + defined.fontStyle = 1; + } + if (base.hasFontWeight()) { + fontWeight = base.fontWeight; + defined.fontWeight = 1; + } + if (base.hasTextDecoration()) { + textDecoration = base.textDecoration; + defined.textDecoration = 1; + } + if (base.hasTextIndent()) { + textIndent = base.textIndent; + defined.textIndent = 1; + } + if (base.hasMarginTop()) { + marginTop = base.marginTop; + defined.marginTop = 1; + } + if (base.hasMarginBottom()) { + marginBottom = base.marginBottom; + defined.marginBottom = 1; + } + if (base.hasMarginLeft()) { + marginLeft = base.marginLeft; + defined.marginLeft = 1; + } + if (base.hasMarginRight()) { + marginRight = base.marginRight; + defined.marginRight = 1; + } + if (base.hasPaddingTop()) { + paddingTop = base.paddingTop; + defined.paddingTop = 1; + } + if (base.hasPaddingBottom()) { + paddingBottom = base.paddingBottom; + defined.paddingBottom = 1; + } + if (base.hasPaddingLeft()) { + paddingLeft = base.paddingLeft; + defined.paddingLeft = 1; + } + if (base.hasPaddingRight()) { + paddingRight = base.paddingRight; + defined.paddingRight = 1; + } + } + + [[nodiscard]] bool hasTextAlign() const { return defined.textAlign; } + [[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; } + [[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; } + [[nodiscard]] bool hasTextDecoration() const { return defined.textDecoration; } + [[nodiscard]] bool hasTextIndent() const { return defined.textIndent; } + [[nodiscard]] bool hasMarginTop() const { return defined.marginTop; } + [[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; } + [[nodiscard]] bool hasMarginLeft() const { return defined.marginLeft; } + [[nodiscard]] bool hasMarginRight() const { return defined.marginRight; } + [[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; } + [[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; } + [[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; } + [[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; } + + void reset() { + textAlign = CssTextAlign::Left; + fontStyle = CssFontStyle::Normal; + fontWeight = CssFontWeight::Normal; + textDecoration = CssTextDecoration::None; + textIndent = CssLength{}; + marginTop = marginBottom = marginLeft = marginRight = CssLength{}; + paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{}; + defined.clearAll(); + } +}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index ac1f537f..ab93d9cb 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]); const char* ITALIC_TAGS[] = {"i", "em"}; constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); +const char* UNDERLINE_TAGS[] = {"u", "ins"}; +constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]); + const char* IMAGE_TAGS[] = {"img"}; constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); @@ -40,17 +43,51 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +bool isHeaderOrBlock(const char* name) { + return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); +} + +// Update effective bold/italic/underline based on block style and inline style stack +void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { + // Start with block-level styles + effectiveBold = currentCssStyle.hasFontWeight() && currentCssStyle.fontWeight == CssFontWeight::Bold; + effectiveItalic = currentCssStyle.hasFontStyle() && currentCssStyle.fontStyle == CssFontStyle::Italic; + effectiveUnderline = + currentCssStyle.hasTextDecoration() && currentCssStyle.textDecoration == CssTextDecoration::Underline; + + // Apply inline style stack in order + for (const auto& entry : inlineStyleStack) { + if (entry.hasBold) { + effectiveBold = entry.bold; + } + if (entry.hasItalic) { + effectiveItalic = entry.italic; + } + if (entry.hasUnderline) { + effectiveUnderline = entry.underline; + } + } +} + // flush the contents of partWordBuffer to currentTextBlock void ChapterHtmlSlimParser::flushPartWordBuffer() { - // determine font style + // Determine font style from depth-based tracking and CSS effective style + const bool isBold = boldUntilDepth < depth || effectiveBold; + const bool isItalic = italicUntilDepth < depth || effectiveItalic; + const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline; + + // Combine style flags using bitwise OR EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (boldUntilDepth < depth && italicUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (boldUntilDepth < depth) { - fontStyle = EpdFontFamily::BOLD; - } else if (italicUntilDepth < depth) { - fontStyle = EpdFontFamily::ITALIC; + if (isBold) { + fontStyle = static_cast(fontStyle | EpdFontFamily::BOLD); } + if (isItalic) { + fontStyle = static_cast(fontStyle | EpdFontFamily::ITALIC); + } + if (isUnderline) { + fontStyle = static_cast(fontStyle | EpdFontFamily::UNDERLINE); + } + // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; currentTextBlock->addWord(partWordBuffer, fontStyle); @@ -58,17 +95,20 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() { } // start a new text block if needed -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { +void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { - currentTextBlock->setStyle(style); + // Merge with existing block style to accumulate CSS styling from parent block elements. + // This handles cases like

text

where the + // div's margin should be preserved, even though it has no direct text content. + currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle)); return; } makePages(); } - currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); + currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle)); } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { @@ -80,13 +120,30 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* return; } + // Extract class and style attributes for CSS processing + std::string classAttr; + std::string styleAttr; + if (atts != nullptr) { + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "class") == 0) { + classAttr = atts[i + 1]; + } else if (strcmp(atts[i], "style") == 0) { + styleAttr = atts[i + 1]; + } + } + } + + auto centeredBlockStyle = BlockStyle(); + centeredBlockStyle.textAlignDefined = true; + centeredBlockStyle.alignment = CssTextAlign::Center; + // Special handling for tables - show placeholder text instead of dropping silently if (strcmp(name, "table") == 0) { // Add placeholder text - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, "[Table omitted]", strlen("[Table omitted]")); @@ -111,9 +168,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, alt.c_str(), alt.length()); @@ -141,43 +198,113 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } - if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - self->startNewTextBlock(TextBlock::CENTER_ALIGN); - self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); - self->depth += 1; - return; + // Compute CSS style for this element + CssStyle cssStyle; + if (self->cssParser) { + // Get combined tag + class styles + cssStyle = self->cssParser->resolveStyle(name, classAttr); + // Merge inline style (highest priority) + if (!styleAttr.empty()) { + CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr); + cssStyle.applyOver(inlineStyle); + } } - if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { + const float emSize = static_cast(self->renderer.getLineHeight(self->fontId)) * self->lineCompression; + const auto userAlignment = static_cast(self->paragraphAlignment); + + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); + self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + self->updateEffectiveInlineStyle(); + } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { if (self->partWordBufferIndex > 0) { // flush word preceding
to currentTextBlock before calling startNewTextBlock self->flushPartWordBuffer(); } - self->startNewTextBlock(self->currentTextBlock->getStyle()); - self->depth += 1; - return; + self->startNewTextBlock(self->currentTextBlock->getBlockStyle()); + } else { + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); + self->updateEffectiveInlineStyle(); + + if (strcmp(name, "li") == 0) { + self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); + } } - - self->startNewTextBlock(static_cast(self->paragraphAlignment)); - if (strcmp(name, "li") == 0) { - self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); + } else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) { + self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth); + // Push inline style entry for underline tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasUnderline = true; + entry.underline = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; } - - self->depth += 1; - return; - } - - if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); - self->depth += 1; - return; - } - - if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { + // Push inline style entry for bold tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasBold = true; + entry.bold = true; + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); - self->depth += 1; - return; + // Push inline style entry for italic tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasItalic = true; + entry.italic = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (strcmp(name, "span") == 0 || !isHeaderOrBlock(name)) { + // Handle span and other inline elements for CSS styling + if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) { + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } } // Unprocessed tag, just increasing depth and continue forward @@ -239,17 +366,27 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { auto* self = static_cast(userData); - if (self->partWordBufferIndex > 0) { - // Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file. - // We don't want to flush out content when closing inline tags like . - // Currently this also flushes out on closing and tags, but they are line tags so that shouldn't happen, - // text styling needs to be overhauled to fix it. - const bool shouldBreakText = - matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || - strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; + // Check if any style state will change after we decrement depth + // If so, we MUST flush the partWordBuffer with the CURRENT style first + // Note: depth hasn't been decremented yet, so we check against (depth - 1) + const bool willPopStyleStack = + !self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1; + const bool willClearBold = self->boldUntilDepth == self->depth - 1; + const bool willClearItalic = self->italicUntilDepth == self->depth - 1; + const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; - if (shouldBreakText) { + const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; + const bool headerOrBlockTag = isHeaderOrBlock(name); + + // Flush buffer with current style BEFORE any style changes + if (self->partWordBufferIndex > 0) { + // Flush if style will change OR if we're closing a block/structural element + const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || + matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 || + matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; + + if (shouldFlush) { self->flushPartWordBuffer(); } } @@ -261,19 +398,40 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n self->skipUntilDepth = INT_MAX; } - // Leaving bold + // Leaving bold tag if (self->boldUntilDepth == self->depth) { self->boldUntilDepth = INT_MAX; } - // Leaving italic + // Leaving italic tag if (self->italicUntilDepth == self->depth) { self->italicUntilDepth = INT_MAX; } + + // Leaving underline tag + if (self->underlineUntilDepth == self->depth) { + self->underlineUntilDepth = INT_MAX; + } + + // Pop from inline style stack if we pushed an entry at this depth + // This handles all inline elements: b, i, u, span, etc. + if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) { + self->inlineStyleStack.pop_back(); + self->updateEffectiveInlineStyle(); + } + + // Clear block style when leaving header or block elements + if (headerOrBlockTag) { + self->currentCssStyle.reset(); + self->updateEffectiveInlineStyle(); + } } bool ChapterHtmlSlimParser::parseAndBuildPages() { - startNewTextBlock((TextBlock::Style)this->paragraphAlignment); + auto paragraphAlignmentBlockStyle = BlockStyle(); + paragraphAlignmentBlockStyle.textAlignDefined = true; + paragraphAlignmentBlockStyle.alignment = static_cast(this->paragraphAlignment); + startNewTextBlock(paragraphAlignmentBlockStyle); const XML_Parser parser = XML_ParserCreate(nullptr); int done; @@ -362,7 +520,9 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line) { currentPageNextY = 0; } - currentPage->elements.push_back(std::make_shared(line, 0, currentPageNextY)); + // Apply horizontal left inset (margin + padding) as x position offset + const int16_t xOffset = line->getBlockStyle().leftInset(); + currentPage->elements.push_back(std::make_shared(line, xOffset, currentPageNextY)); currentPageNextY += lineHeight; } @@ -378,10 +538,34 @@ void ChapterHtmlSlimParser::makePages() { } const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; + + // Apply top spacing before the paragraph (stored in pixels) + const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); + if (blockStyle.marginTop > 0) { + currentPageNextY += blockStyle.marginTop; + } + if (blockStyle.paddingTop > 0) { + currentPageNextY += blockStyle.paddingTop; + } + + // Calculate effective width accounting for horizontal margins/padding + const int horizontalInset = blockStyle.totalHorizontalInset(); + const uint16_t effectiveWidth = + (horizontalInset < viewportWidth) ? static_cast(viewportWidth - horizontalInset) : viewportWidth; + currentTextBlock->layoutAndExtractLines( - renderer, fontId, viewportWidth, + renderer, fontId, effectiveWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Extra paragraph spacing if enabled + + // Apply bottom spacing after the paragraph (stored in pixels) + if (blockStyle.marginBottom > 0) { + currentPageNextY += blockStyle.marginBottom; + } + if (blockStyle.paddingBottom > 0) { + currentPageNextY += blockStyle.paddingBottom; + } + + // Extra paragraph spacing if enabled (default behavior) if (extraParagraphSpacing) { currentPageNextY += lineHeight / 2; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 38202e6e..92a9838a 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -8,6 +8,8 @@ #include "../ParsedText.h" #include "../blocks/TextBlock.h" +#include "../css/CssParser.h" +#include "../css/CssStyle.h" class Page; class GfxRenderer; @@ -23,6 +25,7 @@ class ChapterHtmlSlimParser { int skipUntilDepth = INT_MAX; int boldUntilDepth = INT_MAX; int italicUntilDepth = INT_MAX; + int underlineUntilDepth = INT_MAX; // buffer for building up words from characters, will auto break if longer than this // leave one char at end for null pointer char partWordBuffer[MAX_WORD_SIZE + 1] = {}; @@ -37,8 +40,23 @@ class ChapterHtmlSlimParser { uint16_t viewportWidth; uint16_t viewportHeight; bool hyphenationEnabled; + const CssParser* cssParser; - void startNewTextBlock(TextBlock::Style style); + // Style tracking (replaces depth-based approach) + struct StyleStackEntry { + int depth = 0; + bool hasBold = false, bold = false; + bool hasItalic = false, italic = false; + bool hasUnderline = false, underline = false; + }; + std::vector inlineStyleStack; + CssStyle currentCssStyle; + bool effectiveBold = false; + bool effectiveItalic = false; + bool effectiveUnderline = false; + + void updateEffectiveInlineStyle(); + void startNewTextBlock(const BlockStyle& blockStyle); void flushPartWordBuffer(); void makePages(); // XML callbacks @@ -52,7 +70,8 @@ class ChapterHtmlSlimParser { const uint8_t paragraphAlignment, const uint16_t viewportWidth, const uint16_t viewportHeight, const bool hyphenationEnabled, const std::function)>& completePageFn, - const std::function& popupFn = nullptr) + const std::function& popupFn = nullptr, const CssParser* cssParser = nullptr) + : filepath(filepath), renderer(renderer), fontId(fontId), @@ -63,7 +82,9 @@ class ChapterHtmlSlimParser { viewportHeight(viewportHeight), hyphenationEnabled(hyphenationEnabled), completePageFn(completePageFn), - popupFn(popupFn) {} + popupFn(popupFn), + cssParser(cssParser) {} + ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages(); void addLineToPage(std::shared_ptr line); diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index ce0e22ea..c6cdec4e 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -8,6 +8,7 @@ namespace { constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; +constexpr char MEDIA_TYPE_CSS[] = "text/css"; constexpr char itemCacheFile[] = "/.items.bin"; } // namespace @@ -218,6 +219,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name } } + // Collect CSS files + if (mediaType == MEDIA_TYPE_CSS) { + self->cssFiles.push_back(href); + } + // EPUB 3: Check for nav document (properties contains "nav") if (!properties.empty() && self->tocNavPath.empty()) { // Properties is space-separated, check if "nav" is present as a word diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index b40a3787..1253eae3 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -64,6 +64,7 @@ class ContentOpfParser final : public Print { std::string tocNavPath; // EPUB 3 nav document path std::string coverItemHref; std::string textReferenceHref; + std::vector cssFiles; // CSS stylesheet paths explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache) diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index b5aa7710..40caf18c 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -470,6 +470,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const { return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX; } +int GfxRenderer::getTextAdvanceX(const int fontId, const char* text) const { + if (fontMap.count(fontId) == 0) { + Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); + return 0; + } + + uint32_t cp; + int width = 0; + while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX; + } + return width; +} + int GfxRenderer::getFontAscenderSize(const int fontId) const { if (fontMap.count(fontId) == 0) { Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h index 86ddc8fc..66d625f5 100644 --- a/lib/GfxRenderer/GfxRenderer.h +++ b/lib/GfxRenderer/GfxRenderer.h @@ -78,6 +78,7 @@ class GfxRenderer { void drawText(int fontId, int x, int y, const char* text, bool black = true, EpdFontFamily::Style style = EpdFontFamily::REGULAR) const; int getSpaceWidth(int fontId) const; + int getTextAdvanceX(int fontId, const char* text) const; int getFontAscenderSize(int fontId) const; int getLineHeight(int fontId) const; std::string truncatedText(int fontId, const char* text, int maxWidth, diff --git a/src/activities/boot_sleep/SleepActivity.cpp b/src/activities/boot_sleep/SleepActivity.cpp index 7ffc5851..39460435 100644 --- a/src/activities/boot_sleep/SleepActivity.cpp +++ b/src/activities/boot_sleep/SleepActivity.cpp @@ -238,7 +238,8 @@ void SleepActivity::renderCoverSleepScreen() const { } else if (StringUtils::checkFileExtension(APP_STATE.openEpubPath, ".epub")) { // Handle EPUB file Epub lastEpub(APP_STATE.openEpubPath, "/.crosspoint"); - if (!lastEpub.load()) { + // Skip loading css since we only need metadata here + if (!lastEpub.load(true, true)) { Serial.println("[SLP] Failed to load last epub"); return renderDefaultSleepScreen(); } diff --git a/src/activities/home/HomeActivity.cpp b/src/activities/home/HomeActivity.cpp index 678af7cb..a6aa443b 100644 --- a/src/activities/home/HomeActivity.cpp +++ b/src/activities/home/HomeActivity.cpp @@ -52,7 +52,8 @@ void HomeActivity::onEnter() { // If epub, try to load the metadata for title/author and cover if (StringUtils::checkFileExtension(lastBookTitle, ".epub")) { Epub epub(APP_STATE.openEpubPath, "/.crosspoint"); - epub.load(false); + // Skip loading css since we only need metadata here + epub.load(false, true); if (!epub.getTitle().empty()) { lastBookTitle = std::string(epub.getTitle()); }