From 94ce987f2cfb72063e1c5223bbb57273c9eb3a26 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 17 Jan 2026 17:57:04 -0500 Subject: [PATCH 01/20] feat: Add CSS parsing and CSS support in EPUBs --- lib/Epub/Epub.cpp | 57 ++ lib/Epub/Epub.h | 5 + lib/Epub/Epub/BookMetadataCache.cpp | 24 +- lib/Epub/Epub/BookMetadataCache.h | 2 + lib/Epub/Epub/ParsedText.cpp | 32 +- lib/Epub/Epub/ParsedText.h | 12 +- lib/Epub/Epub/Section.cpp | 4 +- lib/Epub/Epub/blocks/BlockStyle.h | 17 + lib/Epub/Epub/blocks/TextBlock.cpp | 86 ++- lib/Epub/Epub/blocks/TextBlock.h | 21 +- lib/Epub/Epub/css/CssParser.cpp | 503 ++++++++++++++++++ lib/Epub/Epub/css/CssParser.h | 100 ++++ lib/Epub/Epub/css/CssStyle.h | 140 +++++ .../Epub/parsers/ChapterHtmlSlimParser.cpp | 284 +++++++++- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 25 +- lib/Epub/Epub/parsers/ContentOpfParser.cpp | 6 + lib/Epub/Epub/parsers/ContentOpfParser.h | 3 + lib/GfxRenderer/GfxRenderer.cpp | 14 + lib/GfxRenderer/GfxRenderer.h | 1 + 19 files changed, 1290 insertions(+), 46 deletions(-) create mode 100644 lib/Epub/Epub/blocks/BlockStyle.h create mode 100644 lib/Epub/Epub/css/CssParser.cpp create mode 100644 lib/Epub/Epub/css/CssParser.h create mode 100644 lib/Epub/Epub/css/CssStyle.h diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index 1b337721..07a7a8ff 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -85,6 +85,9 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNavItem = opfParser.tocNavPath; } + // Copy CSS files to metadata + bookMetadata.cssFiles = opfParser.cssFiles; + Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; } @@ -203,6 +206,55 @@ bool Epub::parseTocNavFile() const { return true; } +bool Epub::parseCssFiles() { + if (!bookMetadataCache || !bookMetadataCache->isLoaded()) { + Serial.printf("[%lu] [EBP] Cannot parse CSS, cache not loaded\n", millis()); + return false; + } + + // Always create CssParser - needed for inline style parsing even without CSS files + cssParser.reset(new CssParser()); + + const auto& cssFiles = bookMetadataCache->coreMetadata.cssFiles; + if (cssFiles.empty()) { + Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis()); + return true; + } + + for (const auto& cssPath : cssFiles) { + Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str()); + + // Extract CSS file to temp location + const auto tmpCssPath = getCachePath() + "/.tmp.css"; + FsFile tempCssFile; + if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis()); + continue; + } + if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) { + Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str()); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + tempCssFile.close(); + + // Parse the CSS file + if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis()); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + cssParser->loadFromStream(tempCssFile); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + } + + Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(), + cssFiles.size()); + return true; +} + // load in the meta data for the epub file bool Epub::load(const bool buildIfMissing) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); @@ -212,6 +264,8 @@ bool Epub::load(const bool buildIfMissing) { // Try to load existing cache first if (bookMetadataCache->load()) { + // Parse CSS files from loaded cache + parseCssFiles(); Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } @@ -299,6 +353,9 @@ bool Epub::load(const bool buildIfMissing) { return false; } + // Parse CSS files after cache reload + parseCssFiles(); + Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 91062aa4..af9d0bcc 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -8,6 +8,7 @@ #include #include "Epub/BookMetadataCache.h" +#include "Epub/css/CssParser.h" class ZipFile; @@ -24,11 +25,14 @@ class Epub { std::string cachePath; // Spine and TOC cache std::unique_ptr bookMetadataCache; + // CSS parser for styling + std::unique_ptr cssParser; bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; + bool parseCssFiles(); public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -63,4 +67,5 @@ class Epub { size_t getBookSize() const; uint8_t calculateProgress(int currentSpineIndex, float currentSpineRead) const; + const CssParser* getCssParser() const { return cssParser.get(); } }; diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 52e48098..c276c5e7 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -9,7 +9,7 @@ #include "FsHelpers.h" namespace { -constexpr uint8_t BOOK_CACHE_VERSION = 4; +constexpr uint8_t BOOK_CACHE_VERSION = 5; constexpr char bookBinFile[] = "/book.bin"; constexpr char tmpSpineBinFile[] = "/spine.bin.tmp"; constexpr char tmpTocBinFile[] = "/toc.bin.tmp"; @@ -87,8 +87,13 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta constexpr uint32_t headerASize = sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount); + // Calculate CSS files size: count + each string (length + data) + uint32_t cssFilesSize = sizeof(uint16_t); // count + for (const auto& css : metadata.cssFiles) { + cssFilesSize += sizeof(uint32_t) + css.size(); + } const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.coverItemHref.size() + - metadata.textReferenceHref.size() + sizeof(uint32_t) * 4; + metadata.textReferenceHref.size() + sizeof(uint32_t) * 4 + cssFilesSize; const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount; const uint32_t lutOffset = headerASize + metadataSize; @@ -102,6 +107,11 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta serialization::writeString(bookFile, metadata.author); serialization::writeString(bookFile, metadata.coverItemHref); serialization::writeString(bookFile, metadata.textReferenceHref); + // CSS files + serialization::writePod(bookFile, static_cast(metadata.cssFiles.size())); + for (const auto& css : metadata.cssFiles) { + serialization::writeString(bookFile, css); + } // Loop through spine entries, writing LUT positions spineFile.seek(0); @@ -291,6 +301,16 @@ bool BookMetadataCache::load() { serialization::readString(bookFile, coreMetadata.author); serialization::readString(bookFile, coreMetadata.coverItemHref); serialization::readString(bookFile, coreMetadata.textReferenceHref); + // CSS files + uint16_t cssCount; + serialization::readPod(bookFile, cssCount); + coreMetadata.cssFiles.clear(); + coreMetadata.cssFiles.reserve(cssCount); + for (uint16_t i = 0; i < cssCount; i++) { + std::string cssPath; + serialization::readString(bookFile, cssPath); + coreMetadata.cssFiles.push_back(std::move(cssPath)); + } loaded = true; Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount); diff --git a/lib/Epub/Epub/BookMetadataCache.h b/lib/Epub/Epub/BookMetadataCache.h index 5f1862c5..7d703ad5 100644 --- a/lib/Epub/Epub/BookMetadataCache.h +++ b/lib/Epub/Epub/BookMetadataCache.h @@ -3,6 +3,7 @@ #include #include +#include class BookMetadataCache { public: @@ -11,6 +12,7 @@ class BookMetadataCache { std::string author; std::string coverItemHref; std::string textReferenceHref; + std::vector cssFiles; }; struct SpineEntry { diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 3c37e31b..634f1413 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -10,11 +10,12 @@ constexpr int MAX_COST = std::numeric_limits::max(); -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) { if (word.empty()) return; words.push_back(std::move(word)); wordStyles.push_back(fontStyle); + wordUnderlines.push_back(underline); } // Consumes data to minimize memory usage @@ -42,17 +43,33 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere std::vector wordWidths; wordWidths.reserve(totalWordCount); - // add em-space at the beginning of first word in paragraph to indent - if ((style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && !extraParagraphSpacing) { + // Apply text indent: either from CSS blockStyle or default em-space for justified/left-aligned + const bool shouldIndent = (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && !extraParagraphSpacing; + if (blockStyle.textIndent > 0) { + // CSS text-indent is handled via first word width adjustment + // We'll add the indent value directly to the first word's width + } else if (shouldIndent) { + // Default: add em-space at the beginning of first word in paragraph to indent std::string& first_word = words.front(); first_word.insert(0, "\xe2\x80\x83"); } auto wordsIt = words.begin(); auto wordStylesIt = wordStyles.begin(); + bool isFirst = true; while (wordsIt != words.end()) { - wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt)); + uint16_t width = renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt); + + // Add CSS text-indent to first word width + if (isFirst && blockStyle.textIndent > 0 && shouldIndent) { + width += static_cast(blockStyle.textIndent); + isFirst = false; + } else { + isFirst = false; + } + + wordWidths.push_back(width); std::advance(wordsIt, 1); std::advance(wordStylesIt, 1); @@ -182,14 +199,19 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const // Iterators always start at the beginning as we are moving content with splice below auto wordEndIt = words.begin(); auto wordStyleEndIt = wordStyles.begin(); + auto wordUnderlineEndIt = wordUnderlines.begin(); std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); + std::advance(wordUnderlineEndIt, lineWordCount); // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); + std::list lineWordUnderlines; + lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt); - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); + processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style, + blockStyle, std::move(lineWordUnderlines))); } diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index 4b851a94..6f417dcc 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -8,6 +8,7 @@ #include #include +#include "blocks/BlockStyle.h" #include "blocks/TextBlock.h" class GfxRenderer; @@ -15,7 +16,9 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; + std::list wordUnderlines; // Track underline per word TextBlock::Style style; + BlockStyle blockStyle; bool extraParagraphSpacing; std::vector computeLineBreaks(int pageWidth, int spaceWidth, const std::vector& wordWidths) const; @@ -25,13 +28,16 @@ class ParsedText { std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: - explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing) - : style(style), extraParagraphSpacing(extraParagraphSpacing) {} + explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, + const BlockStyle& blockStyle = BlockStyle()) + : style(style), blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing) {} ~ParsedText() = default; - void addWord(std::string word, EpdFontFamily::Style fontStyle); + void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); void setStyle(const TextBlock::Style style) { this->style = style; } + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } TextBlock::Style getStyle() const { return style; } + const BlockStyle& getBlockStyle() const { return blockStyle; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index 18b81aae..d2404328 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -7,7 +7,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 9; +constexpr uint8_t SECTION_FILE_VERSION = 10; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t); } // namespace @@ -179,7 +179,7 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn); + progressFn, epub->getCssParser()); success = visitor.parseAndBuildPages(); SdMan.remove(tmpHtmlPath.c_str()); diff --git a/lib/Epub/Epub/blocks/BlockStyle.h b/lib/Epub/Epub/blocks/BlockStyle.h new file mode 100644 index 00000000..2b073b63 --- /dev/null +++ b/lib/Epub/Epub/blocks/BlockStyle.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +/** + * BlockStyle - Block-level CSS properties for paragraphs + * + * Used to track margin/padding spacing and text indentation for block elements. + * Padding is treated similarly to margins for rendering purposes. + */ +struct BlockStyle { + int8_t marginTop = 0; // 0-2 lines + int8_t marginBottom = 0; // 0-2 lines + int8_t paddingTop = 0; // 0-2 lines (treated same as margin) + int8_t paddingBottom = 0; // 0-2 lines (treated same as margin) + int16_t textIndent = 0; // pixels +}; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 2a15aef0..4fa7da75 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -14,13 +14,40 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); - + auto wordUnderlineIt = wordUnderlines.begin(); for (size_t i = 0; i < words.size(); i++) { - renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt); + const int wordX = *wordXposIt + x; + renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt); + + // Draw underline if word is underlined + if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) { + const std::string& w = *wordIt; + const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt); + // y is the top of the text line; add ascender to reach baseline, then offset 2px below + const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; + + int startX = wordX; + int underlineWidth = fullWordWidth; + + // if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line + if (w.size() >= 3 && static_cast(w[0]) == 0xE2 && static_cast(w[1]) == 0x80 && + static_cast(w[2]) == 0x83) { + const char* visiblePtr = w.c_str() + 3; + const int prefixWidth = renderer.getIndentWidth(fontId, std::string("\xe2\x80\x83").c_str()); + const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt); + startX = wordX + prefixWidth; + underlineWidth = visibleWidth; + } + + renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true); + } std::advance(wordIt, 1); std::advance(wordStylesIt, 1); std::advance(wordXposIt, 1); + if (wordUnderlineIt != wordUnderlines.end()) { + std::advance(wordUnderlineIt, 1); + } } } @@ -37,9 +64,35 @@ bool TextBlock::serialize(FsFile& file) const { for (auto x : wordXpos) serialization::writePod(file, x); for (auto s : wordStyles) serialization::writePod(file, s); - // Block style + // Underline flags (packed as bytes, 8 words per byte) + uint8_t underlineByte = 0; + int bitIndex = 0; + auto underlineIt = wordUnderlines.begin(); + for (size_t i = 0; i < words.size(); i++) { + if (underlineIt != wordUnderlines.end() && *underlineIt) { + underlineByte |= 1 << bitIndex; + } + bitIndex++; + if (bitIndex == 8 || i == words.size() - 1) { + serialization::writePod(file, underlineByte); + underlineByte = 0; + bitIndex = 0; + } + if (underlineIt != wordUnderlines.end()) { + ++underlineIt; + } + } + + // Block style (alignment) serialization::writePod(file, style); + // Block style (margins/padding/indent) + serialization::writePod(file, blockStyle.marginTop); + serialization::writePod(file, blockStyle.marginBottom); + serialization::writePod(file, blockStyle.paddingTop); + serialization::writePod(file, blockStyle.paddingBottom); + serialization::writePod(file, blockStyle.textIndent); + return true; } @@ -48,7 +101,9 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { std::list words; std::list wordXpos; std::list wordStyles; + std::list wordUnderlines; Style style; + BlockStyle blockStyle; // Word count serialization::readPod(file, wc); @@ -67,8 +122,29 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& s : wordStyles) serialization::readPod(file, s); - // Block style + // Underline flags (packed as bytes, 8 words per byte) + wordUnderlines.resize(wc, false); + auto underlineIt = wordUnderlines.begin(); + const int bytesNeeded = (wc + 7) / 8; + for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) { + uint8_t underlineByte; + serialization::readPod(file, underlineByte); + for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) { + *underlineIt = (underlineByte & 1 << bit) != 0; + ++underlineIt; + } + } + + // Block style (alignment) serialization::readPod(file, style); - return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style)); + // Block style (margins/padding/indent) + serialization::readPod(file, blockStyle.marginTop); + serialization::readPod(file, blockStyle.marginBottom); + serialization::readPod(file, blockStyle.paddingTop); + serialization::readPod(file, blockStyle.paddingBottom); + serialization::readPod(file, blockStyle.textIndent); + + return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style, + blockStyle, std::move(wordUnderlines))); } diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 415a18f3..68f5c7f8 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -7,6 +7,7 @@ #include #include "Block.h" +#include "BlockStyle.h" // Represents a line of text on a page class TextBlock final : public Block { @@ -22,15 +23,31 @@ class TextBlock final : public Block { std::list words; std::list wordXpos; std::list wordStyles; + std::list wordUnderlines; // Track underline per word Style style; + BlockStyle blockStyle; public: explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const Style style) - : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {} + std::list word_styles, const Style style, + const BlockStyle& blockStyle = BlockStyle(), + std::list word_underlines = std::list()) + : words(std::move(words)), + wordXpos(std::move(word_xpos)), + wordStyles(std::move(word_styles)), + wordUnderlines(std::move(word_underlines)), + style(style), + blockStyle(blockStyle) { + // Ensure underlines list matches words list size + while (this->wordUnderlines.size() < this->words.size()) { + this->wordUnderlines.push_back(false); + } + } ~TextBlock() override = default; void setStyle(const Style style) { this->style = style; } + void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } Style getStyle() const { return style; } + const BlockStyle& getBlockStyle() const { return blockStyle; } bool isEmpty() override { return words.empty(); } void layout(GfxRenderer& renderer) override {}; // given a renderer works out where to break the words into lines diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp new file mode 100644 index 00000000..7ef3f839 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -0,0 +1,503 @@ +#include "CssParser.h" + +#include + +#include +#include + +namespace { + +// Buffer size for reading CSS files +constexpr size_t READ_BUFFER_SIZE = 512; + +// Maximum CSS file size we'll process (prevent memory issues) +constexpr size_t MAX_CSS_SIZE = 64 * 1024; + +// Check if character is CSS whitespace +bool isCssWhitespace(const char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +// Read entire file into string (with size limit) +std::string readFileContent(FsFile& file) { + std::string content; + content.reserve(std::min(static_cast(file.size()), MAX_CSS_SIZE)); + + char buffer[READ_BUFFER_SIZE]; + while (file.available() && content.size() < MAX_CSS_SIZE) { + const int bytesRead = file.read(buffer, sizeof(buffer)); + if (bytesRead <= 0) break; + content.append(buffer, bytesRead); + } + return content; +} + +// Remove CSS comments (/* ... */) from content +std::string stripComments(const std::string& css) { + std::string result; + result.reserve(css.size()); + + size_t pos = 0; + while (pos < css.size()) { + // Look for start of comment + if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') { + // Find end of comment + const size_t endPos = css.find("*/", pos + 2); + if (endPos == std::string::npos) { + // Unterminated comment - skip rest of file + break; + } + pos = endPos + 2; + } else { + result.push_back(css[pos]); + ++pos; + } + } + return result; +} + +// Skip @-rules (like @media, @import, @font-face) +// Returns position after the @-rule +size_t skipAtRule(const std::string& css, const size_t start) { + // Find the end - either semicolon (simple @-rule) or matching brace + size_t pos = start + 1; // Skip the '@' + + // Skip identifier + while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) { + ++pos; + } + + // Look for { or ; + int braceDepth = 0; + while (pos < css.size()) { + const char c = css[pos]; + if (c == '{') { + ++braceDepth; + } else if (c == '}') { + --braceDepth; + if (braceDepth == 0) { + return pos + 1; + } + } else if (c == ';' && braceDepth == 0) { + return pos + 1; + } + ++pos; + } + return css.size(); +} + +// Extract next rule from CSS content +// Returns true if a rule was found, with selector and body filled +bool extractNextRule(const std::string& css, size_t& pos, + std::string& selector, std::string& body) { + selector.clear(); + body.clear(); + + // Skip whitespace and @-rules until we find a regular rule + while (pos < css.size()) { + // Skip whitespace + while (pos < css.size() && isCssWhitespace(css[pos])) { + ++pos; + } + + if (pos >= css.size()) return false; + + // Handle @-rules iteratively (avoids recursion/stack overflow) + if (css[pos] == '@') { + pos = skipAtRule(css, pos); + continue; // Try again after skipping the @-rule + } + + break; // Found start of a regular rule + } + + if (pos >= css.size()) return false; + + // Find opening brace + const size_t bracePos = css.find('{', pos); + if (bracePos == std::string::npos) return false; + + // Extract selector (everything before the brace) + selector = css.substr(pos, bracePos - pos); + + // Find matching closing brace + int depth = 1; + const size_t bodyStart = bracePos + 1; + size_t bodyEnd = bodyStart; + + while (bodyEnd < css.size() && depth > 0) { + if (css[bodyEnd] == '{') ++depth; + else if (css[bodyEnd] == '}') --depth; + ++bodyEnd; + } + + // Extract body (between braces) + if (bodyEnd > bodyStart) { + body = css.substr(bodyStart, bodyEnd - bodyStart - 1); + } + + pos = bodyEnd; + return true; +} + +} // anonymous namespace + +// String utilities implementation + +std::string CssParser::normalized(const std::string& s) { + std::string result; + result.reserve(s.size()); + + bool inSpace = true; // Start true to skip leading space + for (const char c : s) { + if (isCssWhitespace(c)) { + if (!inSpace) { + result.push_back(' '); + inSpace = true; + } + } else { + result.push_back(static_cast(std::tolower(static_cast(c)))); + inSpace = false; + } + } + + // Remove trailing space + if (!result.empty() && result.back() == ' ') { + result.pop_back(); + } + return result; +} + +std::vector CssParser::splitOnChar(const std::string& s, const char delimiter) { + std::vector parts; + size_t start = 0; + + for (size_t i = 0; i <= s.size(); ++i) { + if (i == s.size() || s[i] == delimiter) { + std::string part = s.substr(start, i - start); + std::string trimmed = normalized(part); + if (!trimmed.empty()) { + parts.push_back(trimmed); + } + start = i + 1; + } + } + return parts; +} + +std::vector CssParser::splitWhitespace(const std::string& s) { + std::vector parts; + size_t start = 0; + bool inWord = false; + + for (size_t i = 0; i <= s.size(); ++i) { + const bool isSpace = i == s.size() || isCssWhitespace(s[i]); + if (isSpace && inWord) { + parts.push_back(s.substr(start, i - start)); + inWord = false; + } else if (!isSpace && !inWord) { + start = i; + inWord = true; + } + } + return parts; +} + +// Property value interpreters + +TextAlign CssParser::interpretAlignment(const std::string& val) { + const std::string v = normalized(val); + + if (v == "left" || v == "start") return TextAlign::Left; + if (v == "right" || v == "end") return TextAlign::Right; + if (v == "center") return TextAlign::Center; + if (v == "justify") return TextAlign::Justify; + + return TextAlign::None; +} + +CssFontStyle CssParser::interpretFontStyle(const std::string& val) { + const std::string v = normalized(val); + + if (v == "italic" || v == "oblique") return CssFontStyle::Italic; + return CssFontStyle::Normal; +} + +CssFontWeight CssParser::interpretFontWeight(const std::string& val) { + const std::string v = normalized(val); + + // Named values + if (v == "bold" || v == "bolder") return CssFontWeight::Bold; + if (v == "normal" || v == "lighter") return CssFontWeight::Normal; + + // Numeric values: 100-900 + // CSS spec: 400 = normal, 700 = bold + // We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative) + char* endPtr = nullptr; + const long numericWeight = std::strtol(v.c_str(), &endPtr, 10); + + // If we parsed a number and consumed the whole string + if (endPtr != v.c_str() && *endPtr == '\0') { + return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal; + } + + return CssFontWeight::Normal; +} + +CssTextDecoration CssParser::interpretDecoration(const std::string& val) { + const std::string v = normalized(val); + + // text-decoration can have multiple space-separated values + if (v.find("underline") != std::string::npos) { + return CssTextDecoration::Underline; + } + return CssTextDecoration::None; +} + +float CssParser::interpretLength(const std::string& val, const float emSize) { + const std::string v = normalized(val); + if (v.empty()) return 0.0f; + + // Determine unit and multiplier + float multiplier = 1.0f; + size_t unitStart = v.size(); + + // Find where the number ends + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + // Handle units + if (unitPart == "em" || unitPart == "rem") { + multiplier = emSize; + } else if (unitPart == "pt") { + multiplier = 1.33f; // Approximate pt to px conversion + } + // px is default (multiplier = 1.0) + + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + + if (endPtr == numPart.c_str()) return 0.0f; // No number parsed + + return numericValue * multiplier; +} + +int8_t CssParser::interpretSpacing(const std::string& val) { + const std::string v = normalized(val); + if (v.empty()) return 0; + + // For spacing, we convert to "lines" (discrete units for e-ink) + // 1em ≈ 1 line, percentages based on ~30 lines per page + + float multiplier = 0.0f; + size_t unitStart = v.size(); + + for (size_t i = 0; i < v.size(); ++i) { + const char c = v[i]; + if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { + unitStart = i; + break; + } + } + + const std::string numPart = v.substr(0, unitStart); + const std::string unitPart = v.substr(unitStart); + + if (unitPart == "em" || unitPart == "rem") { + multiplier = 1.0f; // 1em = 1 line + } else if (unitPart == "%") { + multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines + } else { + return 0; // Unsupported unit for spacing + } + + char* endPtr = nullptr; + const float numericValue = std::strtof(numPart.c_str(), &endPtr); + + if (endPtr == numPart.c_str()) return 0; + + int lines = static_cast(numericValue * multiplier); + + // Clamp to reasonable range (0-2 lines) + if (lines < 0) lines = 0; + if (lines > 2) lines = 2; + + return static_cast(lines); +} + +// Declaration parsing + +CssStyle CssParser::parseDeclarations(const std::string& declBlock) { + CssStyle style; + + // Split declarations by semicolon + const auto declarations = splitOnChar(declBlock, ';'); + + for (const auto& decl : declarations) { + // Find colon separator + const size_t colonPos = decl.find(':'); + if (colonPos == std::string::npos || colonPos == 0) continue; + + std::string propName = normalized(decl.substr(0, colonPos)); + std::string propValue = normalized(decl.substr(colonPos + 1)); + + if (propName.empty() || propValue.empty()) continue; + + // Match property and set value + if (propName == "text-align") { + const TextAlign align = interpretAlignment(propValue); + if (align != TextAlign::None) { + style.alignment = align; + style.defined.alignment = 1; + } + } else if (propName == "font-style") { + style.fontStyle = interpretFontStyle(propValue); + style.defined.fontStyle = 1; + } else if (propName == "font-weight") { + style.fontWeight = interpretFontWeight(propValue); + style.defined.fontWeight = 1; + } else if (propName == "text-decoration" || propName == "text-decoration-line") { + style.decoration = interpretDecoration(propValue); + style.defined.decoration = 1; + } else if (propName == "text-indent") { + style.indentPixels = interpretLength(propValue); + style.defined.indent = 1; + } else if (propName == "margin-top") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.marginTop = spacing; + style.defined.marginTop = 1; + } + } else if (propName == "margin-bottom") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.marginBottom = spacing; + style.defined.marginBottom = 1; + } + } else if (propName == "padding-top") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.paddingTop = spacing; + style.defined.paddingTop = 1; + } + } else if (propName == "padding-bottom") { + const int8_t spacing = interpretSpacing(propValue); + if (spacing > 0) { + style.paddingBottom = spacing; + style.defined.paddingBottom = 1; + } + } + } + + return style; +} + +// Rule processing + +void CssParser::processRuleBlock(const std::string& selectorGroup, + const std::string& declarations) { + const CssStyle style = parseDeclarations(declarations); + + // Only store if any properties were set + if (!style.defined.anySet()) return; + + // Handle comma-separated selectors + const auto selectors = splitOnChar(selectorGroup, ','); + + for (const auto& sel : selectors) { + // Normalize the selector + std::string key = normalized(sel); + if (key.empty()) continue; + + // Store or merge with existing + auto it = rulesBySelector_.find(key); + if (it != rulesBySelector_.end()) { + it->second.applyOver(style); + } else { + rulesBySelector_[key] = style; + } + } +} + +// Main parsing entry point + +bool CssParser::loadFromStream(FsFile& source) { + if (!source) { + Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis()); + return false; + } + + // Read file content + const std::string content = readFileContent(source); + if (content.empty()) { + return true; // Empty file is valid + } + + // Remove comments + const std::string cleaned = stripComments(content); + + // Parse rules + size_t pos = 0; + std::string selector, body; + + while (extractNextRule(cleaned, pos, selector, body)) { + processRuleBlock(selector, body); + } + + Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size()); + return true; +} + +// Style resolution + +CssStyle CssParser::resolveStyle(const std::string& tagName, + const std::string& classAttr) const { + CssStyle result; + const std::string tag = normalized(tagName); + + // 1. Apply element-level style (lowest priority) + const auto tagIt = rulesBySelector_.find(tag); + if (tagIt != rulesBySelector_.end()) { + result.applyOver(tagIt->second); + } + + // 2. Apply class styles (medium priority) + if (!classAttr.empty()) { + const auto classes = splitWhitespace(classAttr); + + for (const auto& cls : classes) { + std::string classKey = "." + normalized(cls); + + auto classIt = rulesBySelector_.find(classKey); + if (classIt != rulesBySelector_.end()) { + result.applyOver(classIt->second); + } + } + + // 3. Apply element.class styles (higher priority) + for (const auto& cls : classes) { + std::string combinedKey = tag + "." + normalized(cls); + + auto combinedIt = rulesBySelector_.find(combinedKey); + if (combinedIt != rulesBySelector_.end()) { + result.applyOver(combinedIt->second); + } + } + } + + return result; +} + +// Inline style parsing (static - doesn't need rule database) + +CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { + return parseDeclarations(styleValue); +} diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h new file mode 100644 index 00000000..a10e9027 --- /dev/null +++ b/lib/Epub/Epub/css/CssParser.h @@ -0,0 +1,100 @@ +#pragma once + +#include + +#include +#include +#include + +#include "CssStyle.h" + +/** + * Lightweight CSS parser for EPUB stylesheets + * + * Parses CSS files and extracts styling information relevant for e-ink display. + * Uses a two-phase approach: first tokenizes the CSS content, then builds + * a rule database that can be queried during HTML parsing. + * + * Supported selectors: + * - Element selectors: p, div, h1, etc. + * - Class selectors: .classname + * - Combined: element.classname + * - Grouped: selector1, selector2 { } + * + * Not supported (silently ignored): + * - Descendant/child selectors + * - Pseudo-classes and pseudo-elements + * - Media queries (content is skipped) + * - @import, @font-face, etc. + */ +class CssParser { + public: + CssParser() = default; + ~CssParser() = default; + + // Non-copyable + CssParser(const CssParser&) = delete; + CssParser& operator=(const CssParser&) = delete; + + /** + * Load and parse CSS from a file stream. + * Can be called multiple times to accumulate rules from multiple stylesheets. + * @param source Open file handle to read from + * @return true if parsing completed (even if no rules found) + */ + bool loadFromStream(FsFile& source); + + /** + * Look up the style for an HTML element, considering tag name and class attributes. + * Applies CSS cascade: element style < class style < element.class style + * + * @param tagName The HTML element name (e.g., "p", "div") + * @param classAttr The class attribute value (may contain multiple space-separated classes) + * @return Combined style with all applicable rules merged + */ + [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, + const std::string& classAttr) const; + + /** + * Parse an inline style attribute string. + * @param styleValue The value of a style="" attribute + * @return Parsed style properties + */ + [[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue); + + /** + * Check if any rules have been loaded + */ + [[nodiscard]] bool empty() const { return rulesBySelector_.empty(); } + + /** + * Get count of loaded rule sets + */ + [[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); } + + /** + * Clear all loaded rules + */ + void clear() { rulesBySelector_.clear(); } + + private: + // Storage: maps normalized selector -> style properties + std::unordered_map rulesBySelector_; + + // Internal parsing helpers + void processRuleBlock(const std::string& selectorGroup, const std::string& declarations); + static CssStyle parseDeclarations(const std::string& declBlock); + + // Individual property value parsers + static TextAlign interpretAlignment(const std::string& val); + static CssFontStyle interpretFontStyle(const std::string& val); + static CssFontWeight interpretFontWeight(const std::string& val); + static CssTextDecoration interpretDecoration(const std::string& val); + static float interpretLength(const std::string& val, float emSize = 16.0f); + static int8_t interpretSpacing(const std::string& val); + + // String utilities + static std::string normalized(const std::string& s); + static std::vector splitOnChar(const std::string& s, char delimiter); + static std::vector splitWhitespace(const std::string& s); +}; diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h new file mode 100644 index 00000000..ea6a9d3b --- /dev/null +++ b/lib/Epub/Epub/css/CssStyle.h @@ -0,0 +1,140 @@ +#pragma once + +#include + +// Text alignment options matching CSS text-align property +enum class TextAlign : uint8_t { + None = 0, + Left = 1, + Right = 2, + Center = 3, + Justify = 4 +}; + +// Font style options matching CSS font-style property +enum class CssFontStyle : uint8_t { + Normal = 0, + Italic = 1 +}; + +// Font weight options - CSS supports 100-900, we simplify to normal/bold +enum class CssFontWeight : uint8_t { + Normal = 0, + Bold = 1 +}; + +// Text decoration options +enum class CssTextDecoration : uint8_t { + None = 0, + Underline = 1 +}; + +// Bitmask for tracking which properties have been explicitly set +struct CssPropertyFlags { + uint16_t alignment : 1; + uint16_t fontStyle : 1; + uint16_t fontWeight : 1; + uint16_t decoration : 1; + uint16_t indent : 1; + uint16_t marginTop : 1; + uint16_t marginBottom : 1; + uint16_t paddingTop : 1; + uint16_t paddingBottom : 1; + uint16_t reserved : 7; + + CssPropertyFlags() : alignment(0), fontStyle(0), fontWeight(0), decoration(0), + indent(0), marginTop(0), marginBottom(0), + paddingTop(0), paddingBottom(0), reserved(0) {} + + [[nodiscard]] bool anySet() const { + return alignment || fontStyle || fontWeight || decoration || + indent || marginTop || marginBottom || paddingTop || paddingBottom; + } + + void clearAll() { + alignment = fontStyle = fontWeight = decoration = indent = 0; + marginTop = marginBottom = paddingTop = paddingBottom = 0; + } +}; + +// Represents a collection of CSS style properties +// Only stores properties relevant to e-ink text rendering +struct CssStyle { + TextAlign alignment = TextAlign::None; + CssFontStyle fontStyle = CssFontStyle::Normal; + CssFontWeight fontWeight = CssFontWeight::Normal; + CssTextDecoration decoration = CssTextDecoration::None; + + float indentPixels = 0.0f; // First-line indent in pixels + int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) + int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) + int8_t paddingTop = 0; // Padding before (in lines, 0-2) + int8_t paddingBottom = 0; // Padding after (in lines, 0-2) + + CssPropertyFlags defined; // Tracks which properties were explicitly set + + // Apply properties from another style, only overwriting if the other style + // has that property explicitly defined + void applyOver(const CssStyle& base) { + if (base.defined.alignment) { + alignment = base.alignment; + defined.alignment = 1; + } + if (base.defined.fontStyle) { + fontStyle = base.fontStyle; + defined.fontStyle = 1; + } + if (base.defined.fontWeight) { + fontWeight = base.fontWeight; + defined.fontWeight = 1; + } + if (base.defined.decoration) { + decoration = base.decoration; + defined.decoration = 1; + } + if (base.defined.indent) { + indentPixels = base.indentPixels; + defined.indent = 1; + } + if (base.defined.marginTop) { + marginTop = base.marginTop; + defined.marginTop = 1; + } + if (base.defined.marginBottom) { + marginBottom = base.marginBottom; + defined.marginBottom = 1; + } + if (base.defined.paddingTop) { + paddingTop = base.paddingTop; + defined.paddingTop = 1; + } + if (base.defined.paddingBottom) { + paddingBottom = base.paddingBottom; + defined.paddingBottom = 1; + } + } + + // Compatibility accessors for existing code that uses hasX pattern + [[nodiscard]] bool hasTextAlign() const { return defined.alignment; } + [[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; } + [[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; } + [[nodiscard]] bool hasTextDecoration() const { return defined.decoration; } + [[nodiscard]] bool hasTextIndent() const { return defined.indent; } + [[nodiscard]] bool hasMarginTop() const { return defined.marginTop; } + [[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; } + [[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; } + [[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; } + + // Merge another style (alias for applyOver for compatibility) + void merge(const CssStyle& other) { applyOver(other); } + + void reset() { + alignment = TextAlign::None; + fontStyle = CssFontStyle::Normal; + fontWeight = CssFontWeight::Normal; + decoration = CssTextDecoration::None; + indentPixels = 0.0f; + marginTop = marginBottom = paddingTop = paddingBottom = 0; + defined.clearAll(); + } +}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index acddd81d..0143a56e 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]); const char* ITALIC_TAGS[] = {"i", "em"}; constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); +const char* UNDERLINE_TAGS[] = {"u", "ins"}; +constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]); + const char* IMAGE_TAGS[] = {"img"}; constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); @@ -40,18 +43,55 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } +// Create a BlockStyle from CSS style properties +BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) { + BlockStyle blockStyle; + blockStyle.marginTop = static_cast(cssStyle.marginTop + cssStyle.paddingTop); + blockStyle.marginBottom = static_cast(cssStyle.marginBottom + cssStyle.paddingBottom); + blockStyle.paddingTop = cssStyle.paddingTop; + blockStyle.paddingBottom = cssStyle.paddingBottom; + blockStyle.textIndent = static_cast(cssStyle.indentPixels); + return blockStyle; +} + +// Update effective bold/italic/underline based on block style and inline style stack +void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { + // Start with block-level styles + effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; + effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; + effectiveUnderline = currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; + + // Apply inline style stack in order + for (const auto& entry : inlineStyleStack) { + if (entry.hasBold) { + effectiveBold = entry.bold; + } + if (entry.hasItalic) { + effectiveItalic = entry.italic; + } + if (entry.hasUnderline) { + effectiveUnderline = entry.underline; + } + } +} + // start a new text block if needed -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { currentTextBlock->setStyle(style); + currentTextBlock->setBlockStyle(blockStyle); return; } makePages(); } - currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing)); + currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, blockStyle)); +} + +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { + startNewTextBlock(style, BlockStyle{}); } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { @@ -63,6 +103,19 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* return; } + // Extract class and style attributes for CSS processing + std::string classAttr; + std::string styleAttr; + if (atts != nullptr) { + for (int i = 0; atts[i]; i += 2) { + if (strcmp(atts[i], "class") == 0) { + classAttr = atts[i + 1]; + } else if (strcmp(atts[i], "style") == 0) { + styleAttr = atts[i + 1]; + } + } + } + // Special handling for tables - show placeholder text instead of dropping silently if (strcmp(name, "table") == 0) { // Add placeholder text @@ -120,22 +173,152 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } + // Determine if this is a block element + bool isBlockElement = + matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); + + // Compute CSS style for this element + CssStyle cssStyle; + if (self->cssParser) { + // Get combined tag + class styles + cssStyle = self->cssParser->resolveStyle(name, classAttr); + // Merge inline style (highest priority) + if (!styleAttr.empty()) { + CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr); + cssStyle.merge(inlineStyle); + } + } + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + // Headers: center aligned, bold, apply CSS overrides + TextBlock::Style alignment = TextBlock::CENTER_ALIGN; + if (cssStyle.hasTextAlign()) { + switch (cssStyle.alignment) { + case TextAlign::Left: + alignment = TextBlock::LEFT_ALIGN; + break; + case TextAlign::Right: + alignment = TextBlock::RIGHT_ALIGN; + break; + case TextAlign::Center: + alignment = TextBlock::CENTER_ALIGN; + break; + case TextAlign::Justify: + alignment = TextBlock::JUSTIFIED; + break; + default: + break; + } + } + + self->currentBlockStyle = cssStyle; + self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle)); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + self->updateEffectiveInlineStyle(); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (strcmp(name, "br") == 0) { self->startNewTextBlock(self->currentTextBlock->getStyle()); } else { - self->startNewTextBlock((TextBlock::Style)self->paragraphAlignment); + // Determine alignment from CSS or default + auto alignment = static_cast(self->paragraphAlignment); + if (cssStyle.hasTextAlign()) { + switch (cssStyle.alignment) { + case TextAlign::Left: + alignment = TextBlock::LEFT_ALIGN; + break; + case TextAlign::Right: + alignment = TextBlock::RIGHT_ALIGN; + break; + case TextAlign::Center: + alignment = TextBlock::CENTER_ALIGN; + break; + case TextAlign::Justify: + alignment = TextBlock::JUSTIFIED; + break; + default: + break; + } + } + + self->currentBlockStyle = cssStyle; + self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle)); + self->updateEffectiveInlineStyle(); + if (strcmp(name, "li") == 0) { self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); } } + } else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) { + self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth); + // Push inline style entry for underline tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasUnderline = true; + entry.underline = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); } else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); + // Push inline style entry for bold tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasBold = true; + entry.bold = true; + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); } else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); + // Push inline style entry for italic tag + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + entry.hasItalic = true; + entry.italic = true; + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } else if (strcmp(name, "span") == 0 || !isBlockElement) { + // Handle span and other inline elements for CSS styling + if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) { + StyleStackEntry entry; + entry.depth = self->depth; // Track depth for matching pop + if (cssStyle.hasFontWeight()) { + entry.hasBold = true; + entry.bold = cssStyle.fontWeight == CssFontWeight::Bold; + } + if (cssStyle.hasFontStyle()) { + entry.hasItalic = true; + entry.italic = cssStyle.fontStyle == CssFontStyle::Italic; + } + if (cssStyle.hasTextDecoration()) { + entry.hasUnderline = true; + entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + } + self->inlineStyleStack.push_back(entry); + self->updateEffectiveInlineStyle(); + } } self->depth += 1; @@ -149,12 +332,17 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char return; } + // Determine font style from depth-based tracking and CSS effective style + const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold; + const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic; + const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline; + EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + if (isBold && isItalic) { fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { + } else if (isBold) { fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { + } else if (isItalic) { fontStyle = EpdFontFamily::ITALIC; } @@ -163,7 +351,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline); self->partWordBufferIndex = 0; } // Skip the whitespace char @@ -202,7 +390,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= MAX_WORD_SIZE) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline); self->partWordBufferIndex = 0; } @@ -224,27 +412,42 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { auto* self = static_cast(userData); - if (self->partWordBufferIndex > 0) { - // Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file. - // We don't want to flush out content when closing inline tags like . - // Currently this also flushes out on closing and tags, but they are line tags so that shouldn't happen, - // text styling needs to be overhauled to fix it. - const bool shouldBreakText = - matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || self->depth == 1; + // Check if any style state will change after we decrement depth + // If so, we MUST flush the partWordBuffer with the CURRENT style first + // Note: depth hasn't been decremented yet, so we check against (depth - 1) + const bool willPopStyleStack = !self->inlineStyleStack.empty() && + self->inlineStyleStack.back().depth == self->depth - 1; + const bool willClearBold = self->boldUntilDepth == self->depth - 1; + const bool willClearItalic = self->italicUntilDepth == self->depth - 1; + const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; + + const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; + + // Flush buffer with current style BEFORE any style changes + if (self->partWordBufferIndex > 0) { + // Flush if style will change OR if we're closing a block/structural element + const bool shouldFlush = styleWillChange || + matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || + matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1; + + if (shouldFlush) { + // Use combined depth-based and CSS-based style + const bool isBold = self->boldUntilDepth < self->depth || self->effectiveBold; + const bool isItalic = self->italicUntilDepth < self->depth || self->effectiveItalic; + const bool isUnderline = self->underlineUntilDepth < self->depth || self->effectiveUnderline; - if (shouldBreakText) { EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (self->boldUntilDepth < self->depth && self->italicUntilDepth < self->depth) { + if (isBold && isItalic) { fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (self->boldUntilDepth < self->depth) { + } else if (isBold) { fontStyle = EpdFontFamily::BOLD; - } else if (self->italicUntilDepth < self->depth) { + } else if (isItalic) { fontStyle = EpdFontFamily::ITALIC; } self->partWordBuffer[self->partWordBufferIndex] = '\0'; - self->currentTextBlock->addWord(self->partWordBuffer, fontStyle); + self->currentTextBlock->addWord(self->partWordBuffer, fontStyle, isUnderline); self->partWordBufferIndex = 0; } } @@ -256,15 +459,33 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n self->skipUntilDepth = INT_MAX; } - // Leaving bold + // Leaving bold tag if (self->boldUntilDepth == self->depth) { self->boldUntilDepth = INT_MAX; } - // Leaving italic + // Leaving italic tag if (self->italicUntilDepth == self->depth) { self->italicUntilDepth = INT_MAX; } + + // Leaving underline tag + if (self->underlineUntilDepth == self->depth) { + self->underlineUntilDepth = INT_MAX; + } + + // Pop from inline style stack if we pushed an entry at this depth + // This handles all inline elements: b, i, u, span, etc. + if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) { + self->inlineStyleStack.pop_back(); + self->updateEffectiveInlineStyle(); + } + + // Clear block style when leaving block elements + if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { + self->currentBlockStyle.reset(); + self->updateEffectiveInlineStyle(); + } } bool ChapterHtmlSlimParser::parseAndBuildPages() { @@ -384,10 +605,23 @@ void ChapterHtmlSlimParser::makePages() { } const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; + + // Apply marginTop before the paragraph + const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); + if (blockStyle.marginTop > 0) { + currentPageNextY += lineHeight * blockStyle.marginTop; + } + currentTextBlock->layoutAndExtractLines( renderer, fontId, viewportWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Extra paragraph spacing if enabled + + // Apply marginBottom after the paragraph + if (blockStyle.marginBottom > 0) { + currentPageNextY += lineHeight * blockStyle.marginBottom; + } + + // Extra paragraph spacing if enabled (default behavior) if (extraParagraphSpacing) { currentPageNextY += lineHeight / 2; } diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index c559e157..c3f963c7 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -8,6 +8,8 @@ #include "../ParsedText.h" #include "../blocks/TextBlock.h" +#include "../css/CssParser.h" +#include "../css/CssStyle.h" class Page; class GfxRenderer; @@ -23,6 +25,7 @@ class ChapterHtmlSlimParser { int skipUntilDepth = INT_MAX; int boldUntilDepth = INT_MAX; int italicUntilDepth = INT_MAX; + int underlineUntilDepth = INT_MAX; // buffer for building up words from characters, will auto break if longer than this // leave one char at end for null pointer char partWordBuffer[MAX_WORD_SIZE + 1] = {}; @@ -36,8 +39,24 @@ class ChapterHtmlSlimParser { uint8_t paragraphAlignment; uint16_t viewportWidth; uint16_t viewportHeight; + const CssParser* cssParser; + // Style tracking (replaces depth-based approach) + struct StyleStackEntry { + int depth = 0; + bool hasBold = false, bold = false; + bool hasItalic = false, italic = false; + bool hasUnderline = false, underline = false; + }; + std::vector inlineStyleStack; + CssStyle currentBlockStyle; + bool effectiveBold = false; + bool effectiveItalic = false; + bool effectiveUnderline = false; + + void updateEffectiveInlineStyle(); void startNewTextBlock(TextBlock::Style style); + void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle); void makePages(); // XML callbacks static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts); @@ -50,7 +69,8 @@ class ChapterHtmlSlimParser { const uint8_t paragraphAlignment, const uint16_t viewportWidth, const uint16_t viewportHeight, const std::function)>& completePageFn, - const std::function& progressFn = nullptr) + const std::function& progressFn = nullptr, + const CssParser* cssParser = nullptr) : filepath(filepath), renderer(renderer), fontId(fontId), @@ -60,7 +80,8 @@ class ChapterHtmlSlimParser { viewportWidth(viewportWidth), viewportHeight(viewportHeight), completePageFn(completePageFn), - progressFn(progressFn) {} + progressFn(progressFn), + cssParser(cssParser) {} ~ChapterHtmlSlimParser() = default; bool parseAndBuildPages(); void addLineToPage(std::shared_ptr line); diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.cpp b/lib/Epub/Epub/parsers/ContentOpfParser.cpp index aee7e57b..d403e761 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.cpp +++ b/lib/Epub/Epub/parsers/ContentOpfParser.cpp @@ -8,6 +8,7 @@ namespace { constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; +constexpr char MEDIA_TYPE_CSS[] = "text/css"; constexpr char itemCacheFile[] = "/.items.bin"; } // namespace @@ -192,6 +193,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name } } + // Collect CSS files + if (mediaType == MEDIA_TYPE_CSS) { + self->cssFiles.push_back(href); + } + // EPUB 3: Check for nav document (properties contains "nav") if (!properties.empty() && self->tocNavPath.empty()) { // Properties is space-separated, check if "nav" is present as a word diff --git a/lib/Epub/Epub/parsers/ContentOpfParser.h b/lib/Epub/Epub/parsers/ContentOpfParser.h index 1940aaaf..317f58e8 100644 --- a/lib/Epub/Epub/parsers/ContentOpfParser.h +++ b/lib/Epub/Epub/parsers/ContentOpfParser.h @@ -1,6 +1,8 @@ #pragma once #include +#include + #include "Epub.h" #include "expat.h" @@ -38,6 +40,7 @@ class ContentOpfParser final : public Print { std::string tocNavPath; // EPUB 3 nav document path std::string coverItemHref; std::string textReferenceHref; + std::vector cssFiles; // CSS stylesheet paths explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize, BookMetadataCache* cache) diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index 7072fed8..f35eb026 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -449,6 +449,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const { return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX; } +int GfxRenderer::getIndentWidth(const int fontId, const char* text) const { + if (fontMap.count(fontId) == 0) { + Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); + return 0; + } + + uint32_t cp; + int width = 0; + while ((cp = utf8NextCodepoint(reinterpret_cast(&text)))) { + width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX; + } + return width; +} + int GfxRenderer::getFontAscenderSize(const int fontId) const { if (fontMap.count(fontId) == 0) { Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h index b1fea69b..ae9f483d 100644 --- a/lib/GfxRenderer/GfxRenderer.h +++ b/lib/GfxRenderer/GfxRenderer.h @@ -78,6 +78,7 @@ class GfxRenderer { void drawText(int fontId, int x, int y, const char* text, bool black = true, EpdFontFamily::Style style = EpdFontFamily::REGULAR) const; int getSpaceWidth(int fontId) const; + int getIndentWidth(int fontId, const char* text) const; int getFontAscenderSize(int fontId) const; int getLineHeight(int fontId) const; std::string truncatedText(int fontId, const char* text, int maxWidth, From be10b90a71377fb0637206051eb80887f65b456f Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 17 Jan 2026 18:35:44 -0500 Subject: [PATCH 02/20] formatting: run clang-format-fix --- lib/Epub/Epub/Section.cpp | 4 +- lib/Epub/Epub/blocks/TextBlock.h | 3 +- lib/Epub/Epub/css/CssParser.cpp | 23 ++++---- lib/Epub/Epub/css/CssParser.h | 3 +- lib/Epub/Epub/css/CssStyle.h | 53 ++++++++----------- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 22 ++++---- 6 files changed, 46 insertions(+), 62 deletions(-) diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index d2404328..c90a3a6e 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -178,8 +178,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, - [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn, epub->getCssParser()); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn, + epub->getCssParser()); success = visitor.parseAndBuildPages(); SdMan.remove(tmpHtmlPath.c_str()); diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 68f5c7f8..e7993fe8 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -30,8 +30,7 @@ class TextBlock final : public Block { public: explicit TextBlock(std::list words, std::list word_xpos, std::list word_styles, const Style style, - const BlockStyle& blockStyle = BlockStyle(), - std::list word_underlines = std::list()) + const BlockStyle& blockStyle = BlockStyle(), std::list word_underlines = std::list()) : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index 7ef3f839..b62f0b57 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -14,9 +14,7 @@ constexpr size_t READ_BUFFER_SIZE = 512; constexpr size_t MAX_CSS_SIZE = 64 * 1024; // Check if character is CSS whitespace -bool isCssWhitespace(const char c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; -} +bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; } // Read entire file into string (with size limit) std::string readFileContent(FsFile& file) { @@ -88,8 +86,7 @@ size_t skipAtRule(const std::string& css, const size_t start) { // Extract next rule from CSS content // Returns true if a rule was found, with selector and body filled -bool extractNextRule(const std::string& css, size_t& pos, - std::string& selector, std::string& body) { +bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) { selector.clear(); body.clear(); @@ -126,8 +123,10 @@ bool extractNextRule(const std::string& css, size_t& pos, size_t bodyEnd = bodyStart; while (bodyEnd < css.size() && depth > 0) { - if (css[bodyEnd] == '{') ++depth; - else if (css[bodyEnd] == '}') --depth; + if (css[bodyEnd] == '{') + ++depth; + else if (css[bodyEnd] == '}') + --depth; ++bodyEnd; } @@ -402,8 +401,7 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { // Rule processing -void CssParser::processRuleBlock(const std::string& selectorGroup, - const std::string& declarations) { +void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) { const CssStyle style = parseDeclarations(declarations); // Only store if any properties were set @@ -458,8 +456,7 @@ bool CssParser::loadFromStream(FsFile& source) { // Style resolution -CssStyle CssParser::resolveStyle(const std::string& tagName, - const std::string& classAttr) const { +CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const { CssStyle result; const std::string tag = normalized(tagName); @@ -498,6 +495,4 @@ CssStyle CssParser::resolveStyle(const std::string& tagName, // Inline style parsing (static - doesn't need rule database) -CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { - return parseDeclarations(styleValue); -} +CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); } diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h index a10e9027..a1802369 100644 --- a/lib/Epub/Epub/css/CssParser.h +++ b/lib/Epub/Epub/css/CssParser.h @@ -52,8 +52,7 @@ class CssParser { * @param classAttr The class attribute value (may contain multiple space-separated classes) * @return Combined style with all applicable rules merged */ - [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, - const std::string& classAttr) const; + [[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const; /** * Parse an inline style attribute string. diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h index ea6a9d3b..83331617 100644 --- a/lib/Epub/Epub/css/CssStyle.h +++ b/lib/Epub/Epub/css/CssStyle.h @@ -3,31 +3,16 @@ #include // Text alignment options matching CSS text-align property -enum class TextAlign : uint8_t { - None = 0, - Left = 1, - Right = 2, - Center = 3, - Justify = 4 -}; +enum class TextAlign : uint8_t { None = 0, Left = 1, Right = 2, Center = 3, Justify = 4 }; // Font style options matching CSS font-style property -enum class CssFontStyle : uint8_t { - Normal = 0, - Italic = 1 -}; +enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 }; // Font weight options - CSS supports 100-900, we simplify to normal/bold -enum class CssFontWeight : uint8_t { - Normal = 0, - Bold = 1 -}; +enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 }; // Text decoration options -enum class CssTextDecoration : uint8_t { - None = 0, - Underline = 1 -}; +enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 }; // Bitmask for tracking which properties have been explicitly set struct CssPropertyFlags { @@ -42,13 +27,21 @@ struct CssPropertyFlags { uint16_t paddingBottom : 1; uint16_t reserved : 7; - CssPropertyFlags() : alignment(0), fontStyle(0), fontWeight(0), decoration(0), - indent(0), marginTop(0), marginBottom(0), - paddingTop(0), paddingBottom(0), reserved(0) {} + CssPropertyFlags() + : alignment(0), + fontStyle(0), + fontWeight(0), + decoration(0), + indent(0), + marginTop(0), + marginBottom(0), + paddingTop(0), + paddingBottom(0), + reserved(0) {} [[nodiscard]] bool anySet() const { - return alignment || fontStyle || fontWeight || decoration || - indent || marginTop || marginBottom || paddingTop || paddingBottom; + return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || paddingTop || + paddingBottom; } void clearAll() { @@ -65,13 +58,13 @@ struct CssStyle { CssFontWeight fontWeight = CssFontWeight::Normal; CssTextDecoration decoration = CssTextDecoration::None; - float indentPixels = 0.0f; // First-line indent in pixels - int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) - int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) - int8_t paddingTop = 0; // Padding before (in lines, 0-2) - int8_t paddingBottom = 0; // Padding after (in lines, 0-2) + float indentPixels = 0.0f; // First-line indent in pixels + int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) + int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) + int8_t paddingTop = 0; // Padding before (in lines, 0-2) + int8_t paddingBottom = 0; // Padding after (in lines, 0-2) - CssPropertyFlags defined; // Tracks which properties were explicitly set + CssPropertyFlags defined; // Tracks which properties were explicitly set // Apply properties from another style, only overwriting if the other style // has that property explicitly defined diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 0143a56e..923ed68f 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -59,7 +59,8 @@ void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { // Start with block-level styles effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; - effectiveUnderline = currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; + effectiveUnderline = + currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; // Apply inline style stack in order for (const auto& entry : inlineStyleStack) { @@ -90,9 +91,7 @@ void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, cons currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, blockStyle)); } -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { - startNewTextBlock(style, BlockStyle{}); -} +void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); } void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); @@ -174,8 +173,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } // Determine if this is a block element - bool isBlockElement = - matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); + bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); // Compute CSS style for this element CssStyle cssStyle; @@ -415,8 +413,8 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n // Check if any style state will change after we decrement depth // If so, we MUST flush the partWordBuffer with the CURRENT style first // Note: depth hasn't been decremented yet, so we check against (depth - 1) - const bool willPopStyleStack = !self->inlineStyleStack.empty() && - self->inlineStyleStack.back().depth == self->depth - 1; + const bool willPopStyleStack = + !self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1; const bool willClearBold = self->boldUntilDepth == self->depth - 1; const bool willClearItalic = self->italicUntilDepth == self->depth - 1; const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; @@ -426,10 +424,10 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n // Flush buffer with current style BEFORE any style changes if (self->partWordBufferIndex > 0) { // Flush if style will change OR if we're closing a block/structural element - const bool shouldFlush = styleWillChange || - matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || - matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || - matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1; + const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || + matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || + matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || self->depth == 1; if (shouldFlush) { // Use combined depth-based and CSS-based style From 750a6ee1d8ff5e1d438172b7fabe9e94ad552adc Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Mon, 19 Jan 2026 22:39:40 -0600 Subject: [PATCH 03/20] rerun clang-format --- lib/Epub/Epub/Section.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index d33cbb36..a9c5c282 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -186,8 +186,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c ChapterHtmlSlimParser visitor( tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, - [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, - progressFn, epub->getCssParser()); + [this, &lut](std::unique_ptr page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn, + epub->getCssParser()); Hyphenator::setPreferredLanguage(epub->getLanguage()); success = visitor.parseAndBuildPages(); From 5c9412b1419bf798a76ee07ff97a358f8d384789 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Mon, 19 Jan 2026 23:09:35 -0600 Subject: [PATCH 04/20] fix compilation errors --- lib/Epub/Epub/ParsedText.cpp | 3 ++- lib/Epub/Epub/ParsedText.h | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 5e3f2988..6ae1896c 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -99,7 +99,8 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt); // Add CSS text-indent to first word width - if (isFirst && blockStyle.textIndent > 0 && shouldIndent) { + if (isFirst && blockStyle.textIndent > 0 && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && + !extraParagraphSpacing) { width += static_cast(blockStyle.textIndent); isFirst = false; } else { diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index fcc797a1..cc2596c5 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -36,9 +36,11 @@ class ParsedText { public: explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, - const bool hyphenationEnabled = false, - const BlockStyle& blockStyle = BlockStyle())) - : style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled, blockStyle(blockStyle)) {} + const bool hyphenationEnabled = false, const BlockStyle& blockStyle = BlockStyle()) + : style(style), + blockStyle(blockStyle), + extraParagraphSpacing(extraParagraphSpacing), + hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); From 8f3d226bf3af88fad24ed9c0e7564638d44c5c77 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 20 Jan 2026 10:27:55 -0600 Subject: [PATCH 05/20] increment versions to prevent error when opening cached EPUBs --- lib/Epub/Epub/BookMetadataCache.cpp | 2 +- lib/Epub/Epub/Section.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 790c368e..47ba227e 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -9,7 +9,7 @@ #include "FsHelpers.h" namespace { -constexpr uint8_t BOOK_CACHE_VERSION = 5; +constexpr uint8_t BOOK_CACHE_VERSION = 6; constexpr char bookBinFile[] = "/book.bin"; constexpr char tmpSpineBinFile[] = "/spine.bin.tmp"; constexpr char tmpTocBinFile[] = "/toc.bin.tmp"; diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp index a9c5c282..f346bd8d 100644 --- a/lib/Epub/Epub/Section.cpp +++ b/lib/Epub/Epub/Section.cpp @@ -8,7 +8,7 @@ #include "parsers/ChapterHtmlSlimParser.h" namespace { -constexpr uint8_t SECTION_FILE_VERSION = 10; +constexpr uint8_t SECTION_FILE_VERSION = 11; constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(uint32_t); From a41d0f04d50e570bc209817c6a05551c1794b532 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 27 Jan 2026 20:25:02 -0500 Subject: [PATCH 06/20] formatting: run clang-format-fix --- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 9fd570ba..719021a7 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -425,9 +425,8 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || - matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || - strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || - self->depth == 1; + matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 || + matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; if (shouldFlush) { self->flushPartWordBuffer(); From 9dac5bf27e51712724657a3c619b9d5a0fdf1e42 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Thu, 29 Jan 2026 20:05:12 -0500 Subject: [PATCH 07/20] improve CSS margin, padding, and text-indent parsing - margin, padding, and text-indent now all support ems, rems, and px values - shorthand margin/padding CSS is also supported - margin/padding/indent values of 0 should no longer erroneously produce additional spacing --- lib/Epub/Epub/ParsedText.cpp | 68 ++++++++++++------- lib/Epub/Epub/blocks/BlockStyle.h | 20 ++++-- lib/Epub/Epub/blocks/TextBlock.cpp | 10 +++ lib/Epub/Epub/css/CssParser.cpp | 65 +++++++++++++----- lib/Epub/Epub/css/CssStyle.h | 52 +++++++++++--- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 31 ++++++--- 6 files changed, 182 insertions(+), 64 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 6ae1896c..708f90e8 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -93,20 +93,9 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere auto wordsIt = words.begin(); auto wordStylesIt = wordStyles.begin(); - bool isFirst = true; while (wordsIt != words.end()) { uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt); - - // Add CSS text-indent to first word width - if (isFirst && blockStyle.textIndent > 0 && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) && - !extraParagraphSpacing) { - width += static_cast(blockStyle.textIndent); - isFirst = false; - } else { - isFirst = false; - } - wordWidths.push_back(width); std::advance(wordsIt, 1); @@ -122,10 +111,18 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c return {}; } + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing && + (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) + ? blockStyle.textIndent + : 0; + // Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation. for (size_t i = 0; i < wordWidths.size(); ++i) { - while (wordWidths[i] > pageWidth) { - if (!hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { + // First word needs to fit in reduced width if there's an indent + const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; + while (wordWidths[i] > effectiveWidth) { + if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { break; } } @@ -146,11 +143,14 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c int currlen = -spaceWidth; dp[i] = MAX_COST; + // First line has reduced width due to text-indent + const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth; + for (size_t j = i; j < totalWordCount; ++j) { // Current line length: previous width + space + current word width currlen += wordWidths[j] + spaceWidth; - if (currlen > pageWidth) { + if (currlen > effectivePageWidth) { break; } @@ -158,7 +158,7 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c if (j == totalWordCount - 1) { cost = 0; // Last line } else { - const int remainingSpace = pageWidth - currlen; + const int remainingSpace = effectivePageWidth - currlen; // Use long long for the square to prevent overflow const long long cost_ll = static_cast(remainingSpace) * remainingSpace + dp[j + 1]; @@ -213,10 +213,11 @@ void ParsedText::applyParagraphIndent() { return; } - if (blockStyle.textIndent > 0) { - // CSS text-indent is handled via first word width adjustment - // We'll add the indent value directly to the first word's width + if (blockStyle.textIndentDefined) { + // CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace + // The actual indent positioning is handled in extractLine() } else if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) { + // No CSS text-indent defined - use EmSpace fallback for visual indent words.front().insert(0, "\xe2\x80\x83"); } } @@ -225,13 +226,23 @@ void ParsedText::applyParagraphIndent() { std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId, const int pageWidth, const int spaceWidth, std::vector& wordWidths) { + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing && + (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) + ? blockStyle.textIndent + : 0; + std::vector lineBreakIndices; size_t currentIndex = 0; + bool isFirstLine = true; while (currentIndex < wordWidths.size()) { const size_t lineStart = currentIndex; int lineWidth = 0; + // First line has reduced width due to text-indent + const int effectivePageWidth = isFirstLine ? pageWidth - firstLineIndent : pageWidth; + // Consume as many words as possible for current line, splitting when prefixes fit while (currentIndex < wordWidths.size()) { const bool isFirstWord = currentIndex == lineStart; @@ -239,14 +250,14 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r const int candidateWidth = spacing + wordWidths[currentIndex]; // Word fits on current line - if (lineWidth + candidateWidth <= pageWidth) { + if (lineWidth + candidateWidth <= effectivePageWidth) { lineWidth += candidateWidth; ++currentIndex; continue; } // Word would overflow — try to split based on hyphenation points - const int availableWidth = pageWidth - lineWidth - spacing; + const int availableWidth = effectivePageWidth - lineWidth - spacing; const bool allowFallbackBreaks = isFirstWord; // Only for first word on line if (availableWidth > 0 && @@ -266,6 +277,7 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r } lineBreakIndices.push_back(currentIndex); + isFirstLine = false; } return lineBreakIndices; @@ -350,14 +362,22 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lineWordCount = lineBreak - lastBreakAt; + // Calculate first line indent (only for left/justified text without extra paragraph spacing) + const bool isFirstLine = breakIndex == 0; + const int firstLineIndent = isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && + (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) + ? blockStyle.textIndent + : 0; + // Calculate total word width for this line int lineWordWidthSum = 0; for (size_t i = lastBreakAt; i < lineBreak; i++) { lineWordWidthSum += wordWidths[i]; } - // Calculate spacing - const int spareSpace = pageWidth - lineWordWidthSum; + // Calculate spacing (account for indent reducing effective page width on first line) + const int effectivePageWidth = pageWidth - firstLineIndent; + const int spareSpace = effectivePageWidth - lineWordWidthSum; int spacing = spaceWidth; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; @@ -366,8 +386,8 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const spacing = spareSpace / (lineWordCount - 1); } - // Calculate initial x position - uint16_t xpos = 0; + // Calculate initial x position (first line starts at indent for left/justified text) + auto xpos = static_cast(firstLineIndent); if (style == TextBlock::RIGHT_ALIGN) { xpos = spareSpace - (lineWordCount - 1) * spaceWidth; } else if (style == TextBlock::CENTER_ALIGN) { diff --git a/lib/Epub/Epub/blocks/BlockStyle.h b/lib/Epub/Epub/blocks/BlockStyle.h index 2b073b63..fc40b6e1 100644 --- a/lib/Epub/Epub/blocks/BlockStyle.h +++ b/lib/Epub/Epub/blocks/BlockStyle.h @@ -9,9 +9,19 @@ * Padding is treated similarly to margins for rendering purposes. */ struct BlockStyle { - int8_t marginTop = 0; // 0-2 lines - int8_t marginBottom = 0; // 0-2 lines - int8_t paddingTop = 0; // 0-2 lines (treated same as margin) - int8_t paddingBottom = 0; // 0-2 lines (treated same as margin) - int16_t textIndent = 0; // pixels + int16_t marginTop = 0; // pixels + int16_t marginBottom = 0; // pixels + int16_t marginLeft = 0; // pixels + int16_t marginRight = 0; // pixels + int16_t paddingTop = 0; // pixels (treated same as margin) + int16_t paddingBottom = 0; // pixels (treated same as margin) + int16_t paddingLeft = 0; // pixels (treated same as margin) + int16_t paddingRight = 0; // pixels (treated same as margin) + int16_t textIndent = 0; // pixels + bool textIndentDefined = false; // true if text-indent was explicitly set in CSS + + // Combined horizontal insets (margin + padding) + [[nodiscard]] int16_t leftInset() const { return marginLeft + paddingLeft; } + [[nodiscard]] int16_t rightInset() const { return marginRight + paddingRight; } + [[nodiscard]] int16_t totalHorizontalInset() const { return leftInset() + rightInset(); } }; diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 4fa7da75..3fd027fe 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -89,9 +89,14 @@ bool TextBlock::serialize(FsFile& file) const { // Block style (margins/padding/indent) serialization::writePod(file, blockStyle.marginTop); serialization::writePod(file, blockStyle.marginBottom); + serialization::writePod(file, blockStyle.marginLeft); + serialization::writePod(file, blockStyle.marginRight); serialization::writePod(file, blockStyle.paddingTop); serialization::writePod(file, blockStyle.paddingBottom); + serialization::writePod(file, blockStyle.paddingLeft); + serialization::writePod(file, blockStyle.paddingRight); serialization::writePod(file, blockStyle.textIndent); + serialization::writePod(file, blockStyle.textIndentDefined); return true; } @@ -141,9 +146,14 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { // Block style (margins/padding/indent) serialization::readPod(file, blockStyle.marginTop); serialization::readPod(file, blockStyle.marginBottom); + serialization::readPod(file, blockStyle.marginLeft); + serialization::readPod(file, blockStyle.marginRight); serialization::readPod(file, blockStyle.paddingTop); serialization::readPod(file, blockStyle.paddingBottom); + serialization::readPod(file, blockStyle.paddingLeft); + serialization::readPod(file, blockStyle.paddingRight); serialization::readPod(file, blockStyle.textIndent); + serialization::readPod(file, blockStyle.textIndentDefined); return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style, blockStyle, std::move(wordUnderlines))); diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index b62f0b57..679537e9 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -370,28 +370,57 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { style.indentPixels = interpretLength(propValue); style.defined.indent = 1; } else if (propName == "margin-top") { - const int8_t spacing = interpretSpacing(propValue); - if (spacing > 0) { - style.marginTop = spacing; - style.defined.marginTop = 1; - } + style.marginTop = static_cast(interpretLength(propValue)); + style.defined.marginTop = 1; } else if (propName == "margin-bottom") { - const int8_t spacing = interpretSpacing(propValue); - if (spacing > 0) { - style.marginBottom = spacing; - style.defined.marginBottom = 1; + style.marginBottom = static_cast(interpretLength(propValue)); + style.defined.marginBottom = 1; + } else if (propName == "margin-left") { + style.marginLeft = static_cast(interpretLength(propValue)); + style.defined.marginLeft = 1; + } else if (propName == "margin-right") { + style.marginRight = static_cast(interpretLength(propValue)); + style.defined.marginRight = 1; + } else if (propName == "margin") { + // Shorthand: 1-4 values for top, right, bottom, left + const auto values = splitWhitespace(propValue); + if (!values.empty()) { + const auto top = static_cast(interpretLength(values[0])); + const int16_t right = values.size() >= 2 ? static_cast(interpretLength(values[1])) : top; + const int16_t bottom = values.size() >= 3 ? static_cast(interpretLength(values[2])) : top; + const int16_t left = values.size() >= 4 ? static_cast(interpretLength(values[3])) : right; + style.marginTop = top; + style.marginRight = right; + style.marginBottom = bottom; + style.marginLeft = left; + style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; } } else if (propName == "padding-top") { - const int8_t spacing = interpretSpacing(propValue); - if (spacing > 0) { - style.paddingTop = spacing; - style.defined.paddingTop = 1; - } + style.paddingTop = static_cast(interpretLength(propValue)); + style.defined.paddingTop = 1; } else if (propName == "padding-bottom") { - const int8_t spacing = interpretSpacing(propValue); - if (spacing > 0) { - style.paddingBottom = spacing; - style.defined.paddingBottom = 1; + style.paddingBottom = static_cast(interpretLength(propValue)); + style.defined.paddingBottom = 1; + } else if (propName == "padding-left") { + style.paddingLeft = static_cast(interpretLength(propValue)); + style.defined.paddingLeft = 1; + } else if (propName == "padding-right") { + style.paddingRight = static_cast(interpretLength(propValue)); + style.defined.paddingRight = 1; + } else if (propName == "padding") { + // Shorthand: 1-4 values for top, right, bottom, left + const auto values = splitWhitespace(propValue); + if (!values.empty()) { + const auto top = static_cast(interpretLength(values[0])); + const int16_t right = values.size() >= 2 ? static_cast(interpretLength(values[1])) : top; + const int16_t bottom = values.size() >= 3 ? static_cast(interpretLength(values[2])) : top; + const int16_t left = values.size() >= 4 ? static_cast(interpretLength(values[3])) : right; + style.paddingTop = top; + style.paddingRight = right; + style.paddingBottom = bottom; + style.paddingLeft = left; + style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = + style.defined.paddingLeft = 1; } } } diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h index 83331617..b7f5e308 100644 --- a/lib/Epub/Epub/css/CssStyle.h +++ b/lib/Epub/Epub/css/CssStyle.h @@ -23,9 +23,13 @@ struct CssPropertyFlags { uint16_t indent : 1; uint16_t marginTop : 1; uint16_t marginBottom : 1; + uint16_t marginLeft : 1; + uint16_t marginRight : 1; uint16_t paddingTop : 1; uint16_t paddingBottom : 1; - uint16_t reserved : 7; + uint16_t paddingLeft : 1; + uint16_t paddingRight : 1; + uint16_t reserved : 3; CssPropertyFlags() : alignment(0), @@ -35,18 +39,23 @@ struct CssPropertyFlags { indent(0), marginTop(0), marginBottom(0), + marginLeft(0), + marginRight(0), paddingTop(0), paddingBottom(0), + paddingLeft(0), + paddingRight(0), reserved(0) {} [[nodiscard]] bool anySet() const { - return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || paddingTop || - paddingBottom; + return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || marginLeft || + marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight; } void clearAll() { alignment = fontStyle = fontWeight = decoration = indent = 0; - marginTop = marginBottom = paddingTop = paddingBottom = 0; + marginTop = marginBottom = marginLeft = marginRight = 0; + paddingTop = paddingBottom = paddingLeft = paddingRight = 0; } }; @@ -59,10 +68,14 @@ struct CssStyle { CssTextDecoration decoration = CssTextDecoration::None; float indentPixels = 0.0f; // First-line indent in pixels - int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2) - int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2) - int8_t paddingTop = 0; // Padding before (in lines, 0-2) - int8_t paddingBottom = 0; // Padding after (in lines, 0-2) + int16_t marginTop = 0; // Vertical spacing before block (in pixels) + int16_t marginBottom = 0; // Vertical spacing after block (in pixels) + int16_t marginLeft = 0; // Horizontal spacing left of block (in pixels) + int16_t marginRight = 0; // Horizontal spacing right of block (in pixels) + int16_t paddingTop = 0; // Padding before (in pixels) + int16_t paddingBottom = 0; // Padding after (in pixels) + int16_t paddingLeft = 0; // Padding left (in pixels) + int16_t paddingRight = 0; // Padding right (in pixels) CssPropertyFlags defined; // Tracks which properties were explicitly set @@ -97,6 +110,14 @@ struct CssStyle { marginBottom = base.marginBottom; defined.marginBottom = 1; } + if (base.defined.marginLeft) { + marginLeft = base.marginLeft; + defined.marginLeft = 1; + } + if (base.defined.marginRight) { + marginRight = base.marginRight; + defined.marginRight = 1; + } if (base.defined.paddingTop) { paddingTop = base.paddingTop; defined.paddingTop = 1; @@ -105,6 +126,14 @@ struct CssStyle { paddingBottom = base.paddingBottom; defined.paddingBottom = 1; } + if (base.defined.paddingLeft) { + paddingLeft = base.paddingLeft; + defined.paddingLeft = 1; + } + if (base.defined.paddingRight) { + paddingRight = base.paddingRight; + defined.paddingRight = 1; + } } // Compatibility accessors for existing code that uses hasX pattern @@ -115,8 +144,12 @@ struct CssStyle { [[nodiscard]] bool hasTextIndent() const { return defined.indent; } [[nodiscard]] bool hasMarginTop() const { return defined.marginTop; } [[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; } + [[nodiscard]] bool hasMarginLeft() const { return defined.marginLeft; } + [[nodiscard]] bool hasMarginRight() const { return defined.marginRight; } [[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; } [[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; } + [[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; } + [[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; } // Merge another style (alias for applyOver for compatibility) void merge(const CssStyle& other) { applyOver(other); } @@ -127,7 +160,8 @@ struct CssStyle { fontWeight = CssFontWeight::Normal; decoration = CssTextDecoration::None; indentPixels = 0.0f; - marginTop = marginBottom = paddingTop = paddingBottom = 0; + marginTop = marginBottom = marginLeft = marginRight = 0; + paddingTop = paddingBottom = paddingLeft = paddingRight = 0; defined.clearAll(); } }; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 719021a7..0ac10b41 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -46,11 +46,19 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib // Create a BlockStyle from CSS style properties BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) { BlockStyle blockStyle; - blockStyle.marginTop = static_cast(cssStyle.marginTop + cssStyle.paddingTop); - blockStyle.marginBottom = static_cast(cssStyle.marginBottom + cssStyle.paddingBottom); + // Vertical: combine margin and padding for top/bottom spacing + blockStyle.marginTop = static_cast(cssStyle.marginTop + cssStyle.paddingTop); + blockStyle.marginBottom = static_cast(cssStyle.marginBottom + cssStyle.paddingBottom); blockStyle.paddingTop = cssStyle.paddingTop; blockStyle.paddingBottom = cssStyle.paddingBottom; + // Horizontal: store margin and padding separately for layout calculations + blockStyle.marginLeft = cssStyle.marginLeft; + blockStyle.marginRight = cssStyle.marginRight; + blockStyle.paddingLeft = cssStyle.paddingLeft; + blockStyle.paddingRight = cssStyle.paddingRight; + // Text indent blockStyle.textIndent = static_cast(cssStyle.indentPixels); + blockStyle.textIndentDefined = cssStyle.defined.indent; return blockStyle; } @@ -570,7 +578,9 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr line) { currentPageNextY = 0; } - currentPage->elements.push_back(std::make_shared(line, 0, currentPageNextY)); + // Apply horizontal left inset (margin + padding) as x position offset + const int16_t xOffset = line->getBlockStyle().leftInset(); + currentPage->elements.push_back(std::make_shared(line, xOffset, currentPageNextY)); currentPageNextY += lineHeight; } @@ -587,19 +597,24 @@ void ChapterHtmlSlimParser::makePages() { const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; - // Apply marginTop before the paragraph + // Apply marginTop before the paragraph (stored in pixels) const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); if (blockStyle.marginTop > 0) { - currentPageNextY += lineHeight * blockStyle.marginTop; + currentPageNextY += blockStyle.marginTop; } + // Calculate effective width accounting for horizontal margins/padding + const int horizontalInset = blockStyle.totalHorizontalInset(); + const uint16_t effectiveWidth = + (horizontalInset < viewportWidth) ? static_cast(viewportWidth - horizontalInset) : viewportWidth; + currentTextBlock->layoutAndExtractLines( - renderer, fontId, viewportWidth, + renderer, fontId, effectiveWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Apply marginBottom after the paragraph + // Apply marginBottom after the paragraph (stored in pixels) if (blockStyle.marginBottom > 0) { - currentPageNextY += lineHeight * blockStyle.marginBottom; + currentPageNextY += blockStyle.marginBottom; } // Extra paragraph spacing if enabled (default behavior) From 6796989247aa1754c076977fb663e2959ed400a7 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 31 Jan 2026 14:19:28 -0500 Subject: [PATCH 08/20] calculate em based on font line height --- lib/Epub/Epub/css/CssParser.cpp | 66 +++++++++---------- lib/Epub/Epub/css/CssParser.h | 2 +- lib/Epub/Epub/css/CssStyle.h | 58 ++++++++++++---- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 33 ++++++---- 4 files changed, 99 insertions(+), 60 deletions(-) diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index 679537e9..8a17f4b8 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -253,15 +253,12 @@ CssTextDecoration CssParser::interpretDecoration(const std::string& val) { return CssTextDecoration::None; } -float CssParser::interpretLength(const std::string& val, const float emSize) { +CssLength CssParser::interpretLength(const std::string& val) { const std::string v = normalized(val); - if (v.empty()) return 0.0f; - - // Determine unit and multiplier - float multiplier = 1.0f; - size_t unitStart = v.size(); + if (v.empty()) return CssLength{}; // Find where the number ends + size_t unitStart = v.size(); for (size_t i = 0; i < v.size(); ++i) { const char c = v[i]; if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') { @@ -273,20 +270,23 @@ float CssParser::interpretLength(const std::string& val, const float emSize) { const std::string numPart = v.substr(0, unitStart); const std::string unitPart = v.substr(unitStart); - // Handle units - if (unitPart == "em" || unitPart == "rem") { - multiplier = emSize; - } else if (unitPart == "pt") { - multiplier = 1.33f; // Approximate pt to px conversion - } - // px is default (multiplier = 1.0) - + // Parse numeric value char* endPtr = nullptr; const float numericValue = std::strtof(numPart.c_str(), &endPtr); + if (endPtr == numPart.c_str()) return CssLength{}; // No number parsed - if (endPtr == numPart.c_str()) return 0.0f; // No number parsed + // Determine unit type (preserve for deferred resolution) + auto unit = CssUnit::Pixels; + if (unitPart == "em") { + unit = CssUnit::Em; + } else if (unitPart == "rem") { + unit = CssUnit::Rem; + } else if (unitPart == "pt") { + unit = CssUnit::Points; + } + // px and unitless default to Pixels - return numericValue * multiplier; + return CssLength{numericValue, unit}; } int8_t CssParser::interpretSpacing(const std::string& val) { @@ -367,28 +367,28 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { style.decoration = interpretDecoration(propValue); style.defined.decoration = 1; } else if (propName == "text-indent") { - style.indentPixels = interpretLength(propValue); + style.indent = interpretLength(propValue); style.defined.indent = 1; } else if (propName == "margin-top") { - style.marginTop = static_cast(interpretLength(propValue)); + style.marginTop = interpretLength(propValue); style.defined.marginTop = 1; } else if (propName == "margin-bottom") { - style.marginBottom = static_cast(interpretLength(propValue)); + style.marginBottom = interpretLength(propValue); style.defined.marginBottom = 1; } else if (propName == "margin-left") { - style.marginLeft = static_cast(interpretLength(propValue)); + style.marginLeft = interpretLength(propValue); style.defined.marginLeft = 1; } else if (propName == "margin-right") { - style.marginRight = static_cast(interpretLength(propValue)); + style.marginRight = interpretLength(propValue); style.defined.marginRight = 1; } else if (propName == "margin") { // Shorthand: 1-4 values for top, right, bottom, left const auto values = splitWhitespace(propValue); if (!values.empty()) { - const auto top = static_cast(interpretLength(values[0])); - const int16_t right = values.size() >= 2 ? static_cast(interpretLength(values[1])) : top; - const int16_t bottom = values.size() >= 3 ? static_cast(interpretLength(values[2])) : top; - const int16_t left = values.size() >= 4 ? static_cast(interpretLength(values[3])) : right; + const CssLength top = interpretLength(values[0]); + const CssLength right = values.size() >= 2 ? interpretLength(values[1]) : top; + const CssLength bottom = values.size() >= 3 ? interpretLength(values[2]) : top; + const CssLength left = values.size() >= 4 ? interpretLength(values[3]) : right; style.marginTop = top; style.marginRight = right; style.marginBottom = bottom; @@ -396,25 +396,25 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; } } else if (propName == "padding-top") { - style.paddingTop = static_cast(interpretLength(propValue)); + style.paddingTop = interpretLength(propValue); style.defined.paddingTop = 1; } else if (propName == "padding-bottom") { - style.paddingBottom = static_cast(interpretLength(propValue)); + style.paddingBottom = interpretLength(propValue); style.defined.paddingBottom = 1; } else if (propName == "padding-left") { - style.paddingLeft = static_cast(interpretLength(propValue)); + style.paddingLeft = interpretLength(propValue); style.defined.paddingLeft = 1; } else if (propName == "padding-right") { - style.paddingRight = static_cast(interpretLength(propValue)); + style.paddingRight = interpretLength(propValue); style.defined.paddingRight = 1; } else if (propName == "padding") { // Shorthand: 1-4 values for top, right, bottom, left const auto values = splitWhitespace(propValue); if (!values.empty()) { - const auto top = static_cast(interpretLength(values[0])); - const int16_t right = values.size() >= 2 ? static_cast(interpretLength(values[1])) : top; - const int16_t bottom = values.size() >= 3 ? static_cast(interpretLength(values[2])) : top; - const int16_t left = values.size() >= 4 ? static_cast(interpretLength(values[3])) : right; + const CssLength top = interpretLength(values[0]); + const CssLength right = values.size() >= 2 ? interpretLength(values[1]) : top; + const CssLength bottom = values.size() >= 3 ? interpretLength(values[2]) : top; + const CssLength left = values.size() >= 4 ? interpretLength(values[3]) : right; style.paddingTop = top; style.paddingRight = right; style.paddingBottom = bottom; diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h index a1802369..7a847b35 100644 --- a/lib/Epub/Epub/css/CssParser.h +++ b/lib/Epub/Epub/css/CssParser.h @@ -89,7 +89,7 @@ class CssParser { static CssFontStyle interpretFontStyle(const std::string& val); static CssFontWeight interpretFontWeight(const std::string& val); static CssTextDecoration interpretDecoration(const std::string& val); - static float interpretLength(const std::string& val, float emSize = 16.0f); + static CssLength interpretLength(const std::string& val); static int8_t interpretSpacing(const std::string& val); // String utilities diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h index b7f5e308..4b3b063f 100644 --- a/lib/Epub/Epub/css/CssStyle.h +++ b/lib/Epub/Epub/css/CssStyle.h @@ -5,6 +5,37 @@ // Text alignment options matching CSS text-align property enum class TextAlign : uint8_t { None = 0, Left = 1, Right = 2, Center = 3, Justify = 4 }; +// CSS length unit types +enum class CssUnit : uint8_t { Pixels = 0, Em = 1, Rem = 2, Points = 3 }; + +// Represents a CSS length value with its unit, allowing deferred resolution to pixels +struct CssLength { + float value = 0.0f; + CssUnit unit = CssUnit::Pixels; + + CssLength() = default; + CssLength(const float v, const CssUnit u) : value(v), unit(u) {} + + // Convenience constructor for pixel values (most common case) + explicit CssLength(const float pixels) : value(pixels) {} + + // Resolve to pixels given the current em size (font line height) + [[nodiscard]] float toPixels(const float emSize) const { + switch (unit) { + case CssUnit::Em: + case CssUnit::Rem: + return value * emSize; + case CssUnit::Points: + return value * 1.33f; // Approximate pt to px conversion + default: + return value; + } + } + + // Resolve to int16_t pixels (for BlockStyle fields) + [[nodiscard]] int16_t toPixelsInt16(const float emSize) const { return static_cast(toPixels(emSize)); } +}; + // Font style options matching CSS font-style property enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 }; @@ -61,21 +92,22 @@ struct CssPropertyFlags { // Represents a collection of CSS style properties // Only stores properties relevant to e-ink text rendering +// Length values are stored as CssLength (value + unit) for deferred resolution struct CssStyle { TextAlign alignment = TextAlign::None; CssFontStyle fontStyle = CssFontStyle::Normal; CssFontWeight fontWeight = CssFontWeight::Normal; CssTextDecoration decoration = CssTextDecoration::None; - float indentPixels = 0.0f; // First-line indent in pixels - int16_t marginTop = 0; // Vertical spacing before block (in pixels) - int16_t marginBottom = 0; // Vertical spacing after block (in pixels) - int16_t marginLeft = 0; // Horizontal spacing left of block (in pixels) - int16_t marginRight = 0; // Horizontal spacing right of block (in pixels) - int16_t paddingTop = 0; // Padding before (in pixels) - int16_t paddingBottom = 0; // Padding after (in pixels) - int16_t paddingLeft = 0; // Padding left (in pixels) - int16_t paddingRight = 0; // Padding right (in pixels) + CssLength indent; // First-line indent (deferred resolution) + CssLength marginTop; // Vertical spacing before block + CssLength marginBottom; // Vertical spacing after block + CssLength marginLeft; // Horizontal spacing left of block + CssLength marginRight; // Horizontal spacing right of block + CssLength paddingTop; // Padding before + CssLength paddingBottom; // Padding after + CssLength paddingLeft; // Padding left + CssLength paddingRight; // Padding right CssPropertyFlags defined; // Tracks which properties were explicitly set @@ -99,7 +131,7 @@ struct CssStyle { defined.decoration = 1; } if (base.defined.indent) { - indentPixels = base.indentPixels; + indent = base.indent; defined.indent = 1; } if (base.defined.marginTop) { @@ -159,9 +191,9 @@ struct CssStyle { fontStyle = CssFontStyle::Normal; fontWeight = CssFontWeight::Normal; decoration = CssTextDecoration::None; - indentPixels = 0.0f; - marginTop = marginBottom = marginLeft = marginRight = 0; - paddingTop = paddingBottom = paddingLeft = paddingRight = 0; + indent = CssLength{}; + marginTop = marginBottom = marginLeft = marginRight = CssLength{}; + paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{}; defined.clearAll(); } }; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 0ac10b41..13325df1 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -43,21 +43,28 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } -// Create a BlockStyle from CSS style properties -BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) { +// Create a BlockStyle from CSS style properties, resolving CssLength values to pixels +// emSize is the current font line height, used for em/rem unit conversion +BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle, const float emSize) { BlockStyle blockStyle; + // Resolve all CssLength values to pixels using the current font's em size + const int16_t marginTopPx = cssStyle.marginTop.toPixelsInt16(emSize); + const int16_t marginBottomPx = cssStyle.marginBottom.toPixelsInt16(emSize); + const int16_t paddingTopPx = cssStyle.paddingTop.toPixelsInt16(emSize); + const int16_t paddingBottomPx = cssStyle.paddingBottom.toPixelsInt16(emSize); + // Vertical: combine margin and padding for top/bottom spacing - blockStyle.marginTop = static_cast(cssStyle.marginTop + cssStyle.paddingTop); - blockStyle.marginBottom = static_cast(cssStyle.marginBottom + cssStyle.paddingBottom); - blockStyle.paddingTop = cssStyle.paddingTop; - blockStyle.paddingBottom = cssStyle.paddingBottom; + blockStyle.marginTop = static_cast(marginTopPx + paddingTopPx); + blockStyle.marginBottom = static_cast(marginBottomPx + paddingBottomPx); + blockStyle.paddingTop = paddingTopPx; + blockStyle.paddingBottom = paddingBottomPx; // Horizontal: store margin and padding separately for layout calculations - blockStyle.marginLeft = cssStyle.marginLeft; - blockStyle.marginRight = cssStyle.marginRight; - blockStyle.paddingLeft = cssStyle.paddingLeft; - blockStyle.paddingRight = cssStyle.paddingRight; + blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize); + blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize); + blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); + blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); // Text indent - blockStyle.textIndent = static_cast(cssStyle.indentPixels); + blockStyle.textIndent = cssStyle.indent.toPixelsInt16(emSize); blockStyle.textIndentDefined = cssStyle.defined.indent; return blockStyle; } @@ -244,7 +251,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } self->currentBlockStyle = cssStyle; - self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle)); + self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId))); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); self->updateEffectiveInlineStyle(); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { @@ -277,7 +284,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } self->currentBlockStyle = cssStyle; - self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle)); + self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId))); self->updateEffectiveInlineStyle(); if (strcmp(name, "li") == 0) { From a6d6e5e770b02c799917a4a812d0f08e1164ffc1 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 31 Jan 2026 14:46:20 -0500 Subject: [PATCH 09/20] fix some styling edge cases: preserving spacing from parent elements, removing stray spaces after italicized text in child elements --- lib/Epub/Epub/ParsedText.cpp | 61 ++++++++++++++++--- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 33 +++++++++- 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 708f90e8..25f7e5ed 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -19,6 +19,23 @@ namespace { constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD"; constexpr size_t SOFT_HYPHEN_BYTES = 2; +// Check if a character is punctuation that should attach to the previous word +// (no space before it). Limited to sentence-ending and clause-separating punctuation +// to avoid false positives with decorative brackets like "[ 1 ]". +bool isAttachingPunctuation(const char c) { + return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':'; +} + +// Check if a word consists entirely of punctuation that should attach to the previous word +bool isAttachingPunctuationWord(const std::string& word) { + if (word.empty()) return false; + // Check if word starts with attaching punctuation and is short (to avoid false positives) + if (isAttachingPunctuation(word[0]) && word.size() <= 3) { + return true; + } + return false; +} + bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; } // Removes every soft hyphen in-place so rendered glyphs match measured widths. @@ -369,10 +386,20 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const ? blockStyle.textIndent : 0; - // Calculate total word width for this line + // Calculate total word width for this line and count actual word gaps + // (punctuation that attaches to previous word doesn't count as a gap) + // Note: words list starts at the beginning because previous lines were spliced out int lineWordWidthSum = 0; - for (size_t i = lastBreakAt; i < lineBreak; i++) { - lineWordWidthSum += wordWidths[i]; + size_t actualGapCount = 0; + auto countWordIt = words.begin(); + + for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { + lineWordWidthSum += wordWidths[lastBreakAt + wordIdx]; + // Count gaps: each word after the first creates a gap, unless it's attaching punctuation + if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) { + actualGapCount++; + } + ++countWordIt; } // Calculate spacing (account for indent reducing effective page width on first line) @@ -382,24 +409,38 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spacing = spaceWidth; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { - spacing = spareSpace / (lineWordCount - 1); + // For justified text, calculate spacing based on actual gap count + if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) { + spacing = spareSpace / static_cast(actualGapCount); } // Calculate initial x position (first line starts at indent for left/justified text) auto xpos = static_cast(firstLineIndent); if (style == TextBlock::RIGHT_ALIGN) { - xpos = spareSpace - (lineWordCount - 1) * spaceWidth; + xpos = spareSpace - static_cast(actualGapCount) * spaceWidth; } else if (style == TextBlock::CENTER_ALIGN) { - xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; + xpos = (spareSpace - static_cast(actualGapCount) * spaceWidth) / 2; } // Pre-calculate X positions for words + // Punctuation that attaches to the previous word doesn't get space before it + // Note: words list starts at the beginning because previous lines were spliced out std::list lineXPos; - for (size_t i = lastBreakAt; i < lineBreak; i++) { - const uint16_t currentWordWidth = wordWidths[i]; + auto wordIt = words.begin(); + + for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { + const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx]; + lineXPos.push_back(xpos); - xpos += currentWordWidth + spacing; + + // Add spacing after this word, unless the next word is attaching punctuation + auto nextWordIt = wordIt; + ++nextWordIt; + const bool nextIsAttachingPunctuation = + wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt); + + xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing); + ++wordIt; } // Iterators always start at the beginning as we are moving content with splice below diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 13325df1..991a8c42 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -113,13 +113,44 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() { partWordBufferIndex = 0; } +// Merge block styles for nested block elements +// When a child block element is inside a parent with no direct text content, +// we accumulate their margins so nested containers properly contribute spacing +BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) { + BlockStyle merged; + // Vertical margins: sum them (nested blocks create additive spacing) + merged.marginTop = static_cast(parent.marginTop + child.marginTop); + merged.marginBottom = static_cast(parent.marginBottom + child.marginBottom); + // Horizontal margins: sum them (nested blocks create cumulative indentation) + merged.marginLeft = static_cast(parent.marginLeft + child.marginLeft); + merged.marginRight = static_cast(parent.marginRight + child.marginRight); + // Padding: sum them + merged.paddingTop = static_cast(parent.paddingTop + child.paddingTop); + merged.paddingBottom = static_cast(parent.paddingBottom + child.paddingBottom); + merged.paddingLeft = static_cast(parent.paddingLeft + child.paddingLeft); + merged.paddingRight = static_cast(parent.paddingRight + child.paddingRight); + // Text indent: use child's if defined, otherwise inherit parent's + if (child.textIndentDefined) { + merged.textIndent = child.textIndent; + merged.textIndentDefined = true; + } else if (parent.textIndentDefined) { + merged.textIndent = parent.textIndent; + merged.textIndentDefined = true; + } + return merged; +} + // start a new text block if needed void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { currentTextBlock->setStyle(style); - currentTextBlock->setBlockStyle(blockStyle); + // Merge with existing block style to accumulate margins from parent block elements + // This handles cases like

text

where the + // div's margin should be preserved even though it has no direct text content + const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle); + currentTextBlock->setBlockStyle(merged); return; } From 394fc418195ef89f03147bd0a1b11aead472d647 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 31 Jan 2026 15:11:22 -0500 Subject: [PATCH 10/20] add quotes to punctuation list --- lib/Epub/Epub/ParsedText.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 25f7e5ed..8e38e63b 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -20,10 +20,11 @@ constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD"; constexpr size_t SOFT_HYPHEN_BYTES = 2; // Check if a character is punctuation that should attach to the previous word -// (no space before it). Limited to sentence-ending and clause-separating punctuation -// to avoid false positives with decorative brackets like "[ 1 ]". +// (no space before it). Includes sentence punctuation and closing quotes. +// Excludes brackets/parens to avoid false positives with decorative patterns like "[ 1 ]". bool isAttachingPunctuation(const char c) { - return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':'; + return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':' || + c == '"' || c == '\''; } // Check if a word consists entirely of punctuation that should attach to the previous word From d445eb0bb094f89c83adc6040a18681f9b98f3c2 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Sat, 31 Jan 2026 15:14:51 -0500 Subject: [PATCH 11/20] fix formatting --- lib/Epub/Epub/ParsedText.cpp | 6 ++---- lib/Epub/Epub/css/CssStyle.h | 18 +++++++++--------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 8e38e63b..63648f04 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -23,8 +23,7 @@ constexpr size_t SOFT_HYPHEN_BYTES = 2; // (no space before it). Includes sentence punctuation and closing quotes. // Excludes brackets/parens to avoid false positives with decorative patterns like "[ 1 ]". bool isAttachingPunctuation(const char c) { - return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':' || - c == '"' || c == '\''; + return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':' || c == '"' || c == '\''; } // Check if a word consists entirely of punctuation that should attach to the previous word @@ -437,8 +436,7 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const // Add spacing after this word, unless the next word is attaching punctuation auto nextWordIt = wordIt; ++nextWordIt; - const bool nextIsAttachingPunctuation = - wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt); + const bool nextIsAttachingPunctuation = wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt); xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing); ++wordIt; diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h index 4b3b063f..1bf0fba0 100644 --- a/lib/Epub/Epub/css/CssStyle.h +++ b/lib/Epub/Epub/css/CssStyle.h @@ -99,15 +99,15 @@ struct CssStyle { CssFontWeight fontWeight = CssFontWeight::Normal; CssTextDecoration decoration = CssTextDecoration::None; - CssLength indent; // First-line indent (deferred resolution) - CssLength marginTop; // Vertical spacing before block - CssLength marginBottom; // Vertical spacing after block - CssLength marginLeft; // Horizontal spacing left of block - CssLength marginRight; // Horizontal spacing right of block - CssLength paddingTop; // Padding before - CssLength paddingBottom; // Padding after - CssLength paddingLeft; // Padding left - CssLength paddingRight; // Padding right + CssLength indent; // First-line indent (deferred resolution) + CssLength marginTop; // Vertical spacing before block + CssLength marginBottom; // Vertical spacing after block + CssLength marginLeft; // Horizontal spacing left of block + CssLength marginRight; // Horizontal spacing right of block + CssLength paddingTop; // Padding before + CssLength paddingBottom; // Padding after + CssLength paddingLeft; // Padding left + CssLength paddingRight; // Padding right CssPropertyFlags defined; // Tracks which properties were explicitly set From 9d58952ba74701950de4b2be29a1f55a705a03e5 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:39:25 -0500 Subject: [PATCH 12/20] refactor: rename getIndentWidth to getTextAdvanceX The function measures the advance width of arbitrary text (specifically em-space prefix), not just indentation. getTextAdvanceX better reflects its actual purpose. --- lib/Epub/Epub/blocks/TextBlock.cpp | 2 +- lib/GfxRenderer/GfxRenderer.cpp | 2 +- lib/GfxRenderer/GfxRenderer.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 3fd027fe..724471b6 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -33,7 +33,7 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int if (w.size() >= 3 && static_cast(w[0]) == 0xE2 && static_cast(w[1]) == 0x80 && static_cast(w[2]) == 0x83) { const char* visiblePtr = w.c_str() + 3; - const int prefixWidth = renderer.getIndentWidth(fontId, std::string("\xe2\x80\x83").c_str()); + const int prefixWidth = renderer.getTextAdvanceX(fontId, std::string("\xe2\x80\x83").c_str()); const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt); startX = wordX + prefixWidth; underlineWidth = visibleWidth; diff --git a/lib/GfxRenderer/GfxRenderer.cpp b/lib/GfxRenderer/GfxRenderer.cpp index a7019230..40caf18c 100644 --- a/lib/GfxRenderer/GfxRenderer.cpp +++ b/lib/GfxRenderer/GfxRenderer.cpp @@ -470,7 +470,7 @@ int GfxRenderer::getSpaceWidth(const int fontId) const { return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX; } -int GfxRenderer::getIndentWidth(const int fontId, const char* text) const { +int GfxRenderer::getTextAdvanceX(const int fontId, const char* text) const { if (fontMap.count(fontId) == 0) { Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); return 0; diff --git a/lib/GfxRenderer/GfxRenderer.h b/lib/GfxRenderer/GfxRenderer.h index 28c6e475..66d625f5 100644 --- a/lib/GfxRenderer/GfxRenderer.h +++ b/lib/GfxRenderer/GfxRenderer.h @@ -78,7 +78,7 @@ class GfxRenderer { void drawText(int fontId, int x, int y, const char* text, bool black = true, EpdFontFamily::Style style = EpdFontFamily::REGULAR) const; int getSpaceWidth(int fontId) const; - int getIndentWidth(int fontId, const char* text) const; + int getTextAdvanceX(int fontId, const char* text) const; int getFontAscenderSize(int fontId) const; int getLineHeight(int fontId) const; std::string truncatedText(int fontId, const char* text, int maxWidth, From 53931b3693febc31e5ad62556361196322950ebd Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:39:58 -0500 Subject: [PATCH 13/20] feat: add UNDERLINE bitflag to EpdFontFamily::Style Convert Style enum to true bitflags by adding UNDERLINE=4. Update getFont() to use bitwise operations instead of equality checks, allowing styles like BOLD|UNDERLINE to work correctly. This is preparation for encoding underline state directly in the Style rather than tracking it separately. --- lib/EpdFont/EpdFontFamily.cpp | 24 ++++++++++-------------- lib/EpdFont/EpdFontFamily.h | 2 +- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/lib/EpdFont/EpdFontFamily.cpp b/lib/EpdFont/EpdFontFamily.cpp index 74a6677f..821153e3 100644 --- a/lib/EpdFont/EpdFontFamily.cpp +++ b/lib/EpdFont/EpdFontFamily.cpp @@ -1,23 +1,19 @@ #include "EpdFontFamily.h" const EpdFont* EpdFontFamily::getFont(const Style style) const { - if (style == BOLD && bold) { + // Extract font style bits (ignore UNDERLINE bit for font selection) + const bool hasBold = (style & BOLD) != 0; + const bool hasItalic = (style & ITALIC) != 0; + + if (hasBold && hasItalic) { + if (boldItalic) return boldItalic; + if (bold) return bold; + if (italic) return italic; + } else if (hasBold && bold) { return bold; - } - if (style == ITALIC && italic) { + } else if (hasItalic && italic) { return italic; } - if (style == BOLD_ITALIC) { - if (boldItalic) { - return boldItalic; - } - if (bold) { - return bold; - } - if (italic) { - return italic; - } - } return regular; } diff --git a/lib/EpdFont/EpdFontFamily.h b/lib/EpdFont/EpdFontFamily.h index 92043d1f..64fd9953 100644 --- a/lib/EpdFont/EpdFontFamily.h +++ b/lib/EpdFont/EpdFontFamily.h @@ -3,7 +3,7 @@ class EpdFontFamily { public: - enum Style : uint8_t { REGULAR = 0, BOLD = 1, ITALIC = 2, BOLD_ITALIC = 3 }; + enum Style : uint8_t { REGULAR = 0, BOLD = 1, ITALIC = 2, BOLD_ITALIC = 3, UNDERLINE = 4 }; explicit EpdFontFamily(const EpdFont* regular, const EpdFont* bold = nullptr, const EpdFont* italic = nullptr, const EpdFont* boldItalic = nullptr) From 0fa4bb410082c586fb390238229117a36fc59e14 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:41:54 -0500 Subject: [PATCH 14/20] refactor: rename CssStyle properties to match CSS naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TextAlign enum → CssTextAlign (reordered to match settings: Justify=0, Left=1, Center=2, Right=3) - alignment → textAlign - indent → textIndent - decoration → textDecoration - Update CssPropertyFlags field names to match - Remove TextAlign::None; default to CssTextAlign::Left This aligns internal naming with actual CSS property names for clarity. --- lib/Epub/Epub/css/CssParser.cpp | 27 +++--- lib/Epub/Epub/css/CssParser.h | 2 +- lib/Epub/Epub/css/CssStyle.h | 88 +++++++++---------- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 34 +++---- 4 files changed, 70 insertions(+), 81 deletions(-) diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index 8a17f4b8..afcf7e95 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -204,15 +204,15 @@ std::vector CssParser::splitWhitespace(const std::string& s) { // Property value interpreters -TextAlign CssParser::interpretAlignment(const std::string& val) { +CssTextAlign CssParser::interpretAlignment(const std::string& val) { const std::string v = normalized(val); - if (v == "left" || v == "start") return TextAlign::Left; - if (v == "right" || v == "end") return TextAlign::Right; - if (v == "center") return TextAlign::Center; - if (v == "justify") return TextAlign::Justify; + if (v == "left" || v == "start") return CssTextAlign::Left; + if (v == "right" || v == "end") return CssTextAlign::Right; + if (v == "center") return CssTextAlign::Center; + if (v == "justify") return CssTextAlign::Justify; - return TextAlign::None; + return CssTextAlign::Left; } CssFontStyle CssParser::interpretFontStyle(const std::string& val) { @@ -352,11 +352,8 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { // Match property and set value if (propName == "text-align") { - const TextAlign align = interpretAlignment(propValue); - if (align != TextAlign::None) { - style.alignment = align; - style.defined.alignment = 1; - } + style.textAlign = interpretAlignment(propValue); + style.defined.textAlign = 1; } else if (propName == "font-style") { style.fontStyle = interpretFontStyle(propValue); style.defined.fontStyle = 1; @@ -364,11 +361,11 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { style.fontWeight = interpretFontWeight(propValue); style.defined.fontWeight = 1; } else if (propName == "text-decoration" || propName == "text-decoration-line") { - style.decoration = interpretDecoration(propValue); - style.defined.decoration = 1; + style.textDecoration = interpretDecoration(propValue); + style.defined.textDecoration = 1; } else if (propName == "text-indent") { - style.indent = interpretLength(propValue); - style.defined.indent = 1; + style.textIndent = interpretLength(propValue); + style.defined.textIndent = 1; } else if (propName == "margin-top") { style.marginTop = interpretLength(propValue); style.defined.marginTop = 1; diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h index 7a847b35..9915485d 100644 --- a/lib/Epub/Epub/css/CssParser.h +++ b/lib/Epub/Epub/css/CssParser.h @@ -85,7 +85,7 @@ class CssParser { static CssStyle parseDeclarations(const std::string& declBlock); // Individual property value parsers - static TextAlign interpretAlignment(const std::string& val); + static CssTextAlign interpretAlignment(const std::string& val); static CssFontStyle interpretFontStyle(const std::string& val); static CssFontWeight interpretFontWeight(const std::string& val); static CssTextDecoration interpretDecoration(const std::string& val); diff --git a/lib/Epub/Epub/css/CssStyle.h b/lib/Epub/Epub/css/CssStyle.h index 1bf0fba0..7b83da3f 100644 --- a/lib/Epub/Epub/css/CssStyle.h +++ b/lib/Epub/Epub/css/CssStyle.h @@ -2,10 +2,8 @@ #include -// Text alignment options matching CSS text-align property -enum class TextAlign : uint8_t { None = 0, Left = 1, Right = 2, Center = 3, Justify = 4 }; - -// CSS length unit types +// Matches order of PARAGRAPH_ALIGNMENT in CrossPointSettings +enum class CssTextAlign : uint8_t { Justify = 0, Left = 1, Center = 2, Right = 3 }; enum class CssUnit : uint8_t { Pixels = 0, Em = 1, Rem = 2, Points = 3 }; // Represents a CSS length value with its unit, allowing deferred resolution to pixels @@ -47,11 +45,11 @@ enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 }; // Bitmask for tracking which properties have been explicitly set struct CssPropertyFlags { - uint16_t alignment : 1; + uint16_t textAlign : 1; uint16_t fontStyle : 1; uint16_t fontWeight : 1; - uint16_t decoration : 1; - uint16_t indent : 1; + uint16_t textDecoration : 1; + uint16_t textIndent : 1; uint16_t marginTop : 1; uint16_t marginBottom : 1; uint16_t marginLeft : 1; @@ -60,14 +58,13 @@ struct CssPropertyFlags { uint16_t paddingBottom : 1; uint16_t paddingLeft : 1; uint16_t paddingRight : 1; - uint16_t reserved : 3; CssPropertyFlags() - : alignment(0), + : textAlign(0), fontStyle(0), fontWeight(0), - decoration(0), - indent(0), + textDecoration(0), + textIndent(0), marginTop(0), marginBottom(0), marginLeft(0), @@ -75,16 +72,15 @@ struct CssPropertyFlags { paddingTop(0), paddingBottom(0), paddingLeft(0), - paddingRight(0), - reserved(0) {} + paddingRight(0) {} [[nodiscard]] bool anySet() const { - return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || marginLeft || - marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight; + return textAlign || fontStyle || fontWeight || textDecoration || textIndent || marginTop || marginBottom || + marginLeft || marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight; } void clearAll() { - alignment = fontStyle = fontWeight = decoration = indent = 0; + textAlign = fontStyle = fontWeight = textDecoration = textIndent = 0; marginTop = marginBottom = marginLeft = marginRight = 0; paddingTop = paddingBottom = paddingLeft = paddingRight = 0; } @@ -94,12 +90,12 @@ struct CssPropertyFlags { // Only stores properties relevant to e-ink text rendering // Length values are stored as CssLength (value + unit) for deferred resolution struct CssStyle { - TextAlign alignment = TextAlign::None; + CssTextAlign textAlign = CssTextAlign::Left; CssFontStyle fontStyle = CssFontStyle::Normal; CssFontWeight fontWeight = CssFontWeight::Normal; - CssTextDecoration decoration = CssTextDecoration::None; + CssTextDecoration textDecoration = CssTextDecoration::None; - CssLength indent; // First-line indent (deferred resolution) + CssLength textIndent; // First-line indent (deferred resolution) CssLength marginTop; // Vertical spacing before block CssLength marginBottom; // Vertical spacing after block CssLength marginLeft; // Horizontal spacing left of block @@ -114,66 +110,65 @@ struct CssStyle { // Apply properties from another style, only overwriting if the other style // has that property explicitly defined void applyOver(const CssStyle& base) { - if (base.defined.alignment) { - alignment = base.alignment; - defined.alignment = 1; + if (base.hasTextAlign()) { + textAlign = base.textAlign; + defined.textAlign = 1; } - if (base.defined.fontStyle) { + if (base.hasFontStyle()) { fontStyle = base.fontStyle; defined.fontStyle = 1; } - if (base.defined.fontWeight) { + if (base.hasFontWeight()) { fontWeight = base.fontWeight; defined.fontWeight = 1; } - if (base.defined.decoration) { - decoration = base.decoration; - defined.decoration = 1; + if (base.hasTextDecoration()) { + textDecoration = base.textDecoration; + defined.textDecoration = 1; } - if (base.defined.indent) { - indent = base.indent; - defined.indent = 1; + if (base.hasTextIndent()) { + textIndent = base.textIndent; + defined.textIndent = 1; } - if (base.defined.marginTop) { + if (base.hasMarginTop()) { marginTop = base.marginTop; defined.marginTop = 1; } - if (base.defined.marginBottom) { + if (base.hasMarginBottom()) { marginBottom = base.marginBottom; defined.marginBottom = 1; } - if (base.defined.marginLeft) { + if (base.hasMarginLeft()) { marginLeft = base.marginLeft; defined.marginLeft = 1; } - if (base.defined.marginRight) { + if (base.hasMarginRight()) { marginRight = base.marginRight; defined.marginRight = 1; } - if (base.defined.paddingTop) { + if (base.hasPaddingTop()) { paddingTop = base.paddingTop; defined.paddingTop = 1; } - if (base.defined.paddingBottom) { + if (base.hasPaddingBottom()) { paddingBottom = base.paddingBottom; defined.paddingBottom = 1; } - if (base.defined.paddingLeft) { + if (base.hasPaddingLeft()) { paddingLeft = base.paddingLeft; defined.paddingLeft = 1; } - if (base.defined.paddingRight) { + if (base.hasPaddingRight()) { paddingRight = base.paddingRight; defined.paddingRight = 1; } } - // Compatibility accessors for existing code that uses hasX pattern - [[nodiscard]] bool hasTextAlign() const { return defined.alignment; } + [[nodiscard]] bool hasTextAlign() const { return defined.textAlign; } [[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; } [[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; } - [[nodiscard]] bool hasTextDecoration() const { return defined.decoration; } - [[nodiscard]] bool hasTextIndent() const { return defined.indent; } + [[nodiscard]] bool hasTextDecoration() const { return defined.textDecoration; } + [[nodiscard]] bool hasTextIndent() const { return defined.textIndent; } [[nodiscard]] bool hasMarginTop() const { return defined.marginTop; } [[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; } [[nodiscard]] bool hasMarginLeft() const { return defined.marginLeft; } @@ -183,15 +178,12 @@ struct CssStyle { [[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; } [[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; } - // Merge another style (alias for applyOver for compatibility) - void merge(const CssStyle& other) { applyOver(other); } - void reset() { - alignment = TextAlign::None; + textAlign = CssTextAlign::Left; fontStyle = CssFontStyle::Normal; fontWeight = CssFontWeight::Normal; - decoration = CssTextDecoration::None; - indent = CssLength{}; + textDecoration = CssTextDecoration::None; + textIndent = CssLength{}; marginTop = marginBottom = marginLeft = marginRight = CssLength{}; paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{}; defined.clearAll(); diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index c6227557..68737279 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -64,8 +64,8 @@ BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle, const float emSize) blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); // Text indent - blockStyle.textIndent = cssStyle.indent.toPixelsInt16(emSize); - blockStyle.textIndentDefined = cssStyle.defined.indent; + blockStyle.textIndent = cssStyle.textIndent.toPixelsInt16(emSize); + blockStyle.textIndentDefined = cssStyle.defined.textIndent; return blockStyle; } @@ -75,7 +75,7 @@ void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; effectiveUnderline = - currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline; + currentBlockStyle.hasTextDecoration() && currentBlockStyle.textDecoration == CssTextDecoration::Underline; // Apply inline style stack in order for (const auto& entry : inlineStyleStack) { @@ -255,7 +255,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* // Merge inline style (highest priority) if (!styleAttr.empty()) { CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr); - cssStyle.merge(inlineStyle); + cssStyle.applyOver(inlineStyle); } } @@ -263,17 +263,17 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* // Headers: center aligned, bold, apply CSS overrides TextBlock::Style alignment = TextBlock::CENTER_ALIGN; if (cssStyle.hasTextAlign()) { - switch (cssStyle.alignment) { - case TextAlign::Left: + switch (cssStyle.textAlign) { + case CssTextAlign::Left: alignment = TextBlock::LEFT_ALIGN; break; - case TextAlign::Right: + case CssTextAlign::Right: alignment = TextBlock::RIGHT_ALIGN; break; - case TextAlign::Center: + case CssTextAlign::Center: alignment = TextBlock::CENTER_ALIGN; break; - case TextAlign::Justify: + case CssTextAlign::Justify: alignment = TextBlock::JUSTIFIED; break; default: @@ -296,17 +296,17 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* // Determine alignment from CSS or default auto alignment = static_cast(self->paragraphAlignment); if (cssStyle.hasTextAlign()) { - switch (cssStyle.alignment) { - case TextAlign::Left: + switch (cssStyle.textAlign) { + case CssTextAlign::Left: alignment = TextBlock::LEFT_ALIGN; break; - case TextAlign::Right: + case CssTextAlign::Right: alignment = TextBlock::RIGHT_ALIGN; break; - case TextAlign::Center: + case CssTextAlign::Center: alignment = TextBlock::CENTER_ALIGN; break; - case TextAlign::Justify: + case CssTextAlign::Justify: alignment = TextBlock::JUSTIFIED; break; default: @@ -352,7 +352,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } if (cssStyle.hasTextDecoration()) { entry.hasUnderline = true; - entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; } self->inlineStyleStack.push_back(entry); self->updateEffectiveInlineStyle(); @@ -369,7 +369,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } if (cssStyle.hasTextDecoration()) { entry.hasUnderline = true; - entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; } self->inlineStyleStack.push_back(entry); self->updateEffectiveInlineStyle(); @@ -388,7 +388,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } if (cssStyle.hasTextDecoration()) { entry.hasUnderline = true; - entry.underline = cssStyle.decoration == CssTextDecoration::Underline; + entry.underline = cssStyle.textDecoration == CssTextDecoration::Underline; } self->inlineStyleStack.push_back(entry); self->updateEffectiveInlineStyle(); From f0f66182466301bb10be93beb9a2c62c752d779b Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:42:20 -0500 Subject: [PATCH 15/20] feat: add BlockStyle conversion and combination methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add alignment and textAlignDefined fields to BlockStyle - Add getCombinedBlockStyle(child) for merging parent/child styles - Add static fromCssStyle(cssStyle, emSize, paragraphAlignment) factory These methods centralize the CSS→BlockStyle conversion logic (previously duplicated in createBlockStyleFromCss) and provide a clean API for handling nested block element style inheritance. --- lib/Epub/Epub/blocks/BlockStyle.h | 89 ++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 13 deletions(-) diff --git a/lib/Epub/Epub/blocks/BlockStyle.h b/lib/Epub/Epub/blocks/BlockStyle.h index fc40b6e1..5c26a21d 100644 --- a/lib/Epub/Epub/blocks/BlockStyle.h +++ b/lib/Epub/Epub/blocks/BlockStyle.h @@ -2,26 +2,89 @@ #include +#include "Epub/css/CssStyle.h" + /** - * BlockStyle - Block-level CSS properties for paragraphs - * - * Used to track margin/padding spacing and text indentation for block elements. - * Padding is treated similarly to margins for rendering purposes. + * BlockStyle - Block-level styling properties */ struct BlockStyle { - int16_t marginTop = 0; // pixels - int16_t marginBottom = 0; // pixels - int16_t marginLeft = 0; // pixels - int16_t marginRight = 0; // pixels - int16_t paddingTop = 0; // pixels (treated same as margin) - int16_t paddingBottom = 0; // pixels (treated same as margin) - int16_t paddingLeft = 0; // pixels (treated same as margin) - int16_t paddingRight = 0; // pixels (treated same as margin) - int16_t textIndent = 0; // pixels + CssTextAlign alignment = CssTextAlign::Justify; + + // Spacing (in pixels) + int16_t marginTop = 0; + int16_t marginBottom = 0; + int16_t marginLeft = 0; + int16_t marginRight = 0; + int16_t paddingTop = 0; // treated same as margin for rendering + int16_t paddingBottom = 0; // treated same as margin for rendering + int16_t paddingLeft = 0; // treated same as margin for rendering + int16_t paddingRight = 0; // treated same as margin for rendering + int16_t textIndent = 0; bool textIndentDefined = false; // true if text-indent was explicitly set in CSS + bool textAlignDefined = false; // true if text-align was explicitly set in CSS // Combined horizontal insets (margin + padding) [[nodiscard]] int16_t leftInset() const { return marginLeft + paddingLeft; } [[nodiscard]] int16_t rightInset() const { return marginRight + paddingRight; } [[nodiscard]] int16_t totalHorizontalInset() const { return leftInset() + rightInset(); } + + // Combine with another block style. Useful for parent -> child styles, where the child style should be + // applied on top of the parent's style to get the combined style. + BlockStyle getCombinedBlockStyle(const BlockStyle& child) const { + BlockStyle combinedBlockStyle; + + combinedBlockStyle.marginTop = static_cast(child.marginTop + marginTop); + combinedBlockStyle.marginBottom = static_cast(child.marginBottom + marginBottom); + combinedBlockStyle.marginLeft = static_cast(child.marginLeft + marginLeft); + combinedBlockStyle.marginRight = static_cast(child.marginRight + marginRight); + + combinedBlockStyle.paddingTop = static_cast(child.paddingTop + paddingTop); + combinedBlockStyle.paddingBottom = static_cast(child.paddingBottom + paddingBottom); + combinedBlockStyle.paddingLeft = static_cast(child.paddingLeft + paddingLeft); + combinedBlockStyle.paddingRight = static_cast(child.paddingRight + paddingRight); + // Text indent: use child's if defined + if (child.textIndentDefined) { + combinedBlockStyle.textIndent = child.textIndent; + combinedBlockStyle.textIndentDefined = true; + } else { + combinedBlockStyle.textIndent = textIndent; + combinedBlockStyle.textIndentDefined = textIndentDefined; + } + // Text align: use child's if defined + if (child.textAlignDefined) { + combinedBlockStyle.alignment = child.alignment; + combinedBlockStyle.textAlignDefined = true; + } else { + combinedBlockStyle.alignment = alignment; + combinedBlockStyle.textAlignDefined = textAlignDefined; + } + return combinedBlockStyle; + } + + // Create a BlockStyle from CSS style properties, resolving CssLength values to pixels + // emSize is the current font line height, used for em/rem unit conversion + // paragraphAlignment is the user's paragraphAlignment setting preference + static BlockStyle fromCssStyle(const CssStyle& cssStyle, const float emSize, const CssTextAlign paragraphAlignment) { + BlockStyle blockStyle; + // Resolve all CssLength values to pixels using the current font's em size + blockStyle.marginTop = cssStyle.marginTop.toPixelsInt16(emSize); + blockStyle.marginBottom = cssStyle.marginBottom.toPixelsInt16(emSize); + blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize); + blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize); + + blockStyle.paddingTop = cssStyle.paddingTop.toPixelsInt16(emSize); + blockStyle.paddingBottom = cssStyle.paddingBottom.toPixelsInt16(emSize); + blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); + blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); + + blockStyle.textIndent = cssStyle.textIndent.toPixelsInt16(emSize); + blockStyle.textIndentDefined = cssStyle.hasTextIndent(); + blockStyle.textAlignDefined = cssStyle.hasTextAlign(); + if (blockStyle.textAlignDefined) { + blockStyle.alignment = cssStyle.textAlign; + } else { + blockStyle.alignment = paragraphAlignment; + } + return blockStyle; + } }; From d564173949e44e687828b0d1fc2a858b5265e5cd Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:42:45 -0500 Subject: [PATCH 16/20] refactor: merge TextBlock::Style into BlockStyle; use bitflag underlines Major consolidation of styling infrastructure: - Remove TextBlock::Style enum (JUSTIFIED, LEFT_ALIGN, etc.) Alignment is now stored in BlockStyle.alignment using CssTextAlign - Remove wordUnderlines list from TextBlock and ParsedText Underline state is now encoded in EpdFontFamily::Style via UNDERLINE bitflag - Use BlockStyle::fromCssStyle() and getCombinedBlockStyle() in parser Removes duplicated createBlockStyleFromCss() and mergeBlockStyles() - Simplify text block rendering to check style bitflag for underlines - Revert spurious spaces handling (isAttachingPunctuation logic) The actualGapCount approach had issues; using standard word gaps This reduces code duplication and simplifies the style inheritance model. --- lib/Epub/Epub/ParsedText.cpp | 113 ++++------- lib/Epub/Epub/ParsedText.h | 15 +- lib/Epub/Epub/blocks/TextBlock.cpp | 66 ++---- lib/Epub/Epub/blocks/TextBlock.h | 24 +-- .../Epub/parsers/ChapterHtmlSlimParser.cpp | 189 +++++------------- lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h | 5 +- 6 files changed, 113 insertions(+), 299 deletions(-) diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp index 63648f04..aca85581 100644 --- a/lib/Epub/Epub/ParsedText.cpp +++ b/lib/Epub/Epub/ParsedText.cpp @@ -19,23 +19,6 @@ namespace { constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD"; constexpr size_t SOFT_HYPHEN_BYTES = 2; -// Check if a character is punctuation that should attach to the previous word -// (no space before it). Includes sentence punctuation and closing quotes. -// Excludes brackets/parens to avoid false positives with decorative patterns like "[ 1 ]". -bool isAttachingPunctuation(const char c) { - return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':' || c == '"' || c == '\''; -} - -// Check if a word consists entirely of punctuation that should attach to the previous word -bool isAttachingPunctuationWord(const std::string& word) { - if (word.empty()) return false; - // Check if word starts with attaching punctuation and is short (to avoid false positives) - if (isAttachingPunctuation(word[0]) && word.size() <= 3) { - return true; - } - return false; -} - bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; } // Removes every soft hyphen in-place so rendered glyphs match measured widths. @@ -66,12 +49,15 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s } // namespace -void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) { +void ParsedText::addWord(std::string word, const EpdFontFamily::Style style, const bool underline) { if (word.empty()) return; words.push_back(std::move(word)); - wordStyles.push_back(fontStyle); - wordUnderlines.push_back(underline); + EpdFontFamily::Style combinedStyle = style; + if (underline) { + combinedStyle = static_cast(combinedStyle | EpdFontFamily::UNDERLINE); + } + wordStyles.push_back(combinedStyle); } // Consumes data to minimize memory usage @@ -112,8 +98,7 @@ std::vector ParsedText::calculateWordWidths(const GfxRenderer& rendere auto wordStylesIt = wordStyles.begin(); while (wordsIt != words.end()) { - uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt); - wordWidths.push_back(width); + wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt)); std::advance(wordsIt, 1); std::advance(wordStylesIt, 1); @@ -129,10 +114,11 @@ std::vector ParsedText::computeLineBreaks(const GfxRenderer& renderer, c } // Calculate first line indent (only for left/justified text without extra paragraph spacing) - const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing && - (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) - ? blockStyle.textIndent - : 0; + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; // Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation. for (size_t i = 0; i < wordWidths.size(); ++i) { @@ -233,7 +219,7 @@ void ParsedText::applyParagraphIndent() { if (blockStyle.textIndentDefined) { // CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace // The actual indent positioning is handled in extractLine() - } else if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) { + } else if (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) { // No CSS text-indent defined - use EmSpace fallback for visual indent words.front().insert(0, "\xe2\x80\x83"); } @@ -244,10 +230,11 @@ std::vector ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r const int pageWidth, const int spaceWidth, std::vector& wordWidths) { // Calculate first line indent (only for left/justified text without extra paragraph spacing) - const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing && - (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) - ? blockStyle.textIndent - : 0; + const int firstLineIndent = + blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; std::vector lineBreakIndices; size_t currentIndex = 0; @@ -381,25 +368,16 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const // Calculate first line indent (only for left/justified text without extra paragraph spacing) const bool isFirstLine = breakIndex == 0; - const int firstLineIndent = isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && - (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) - ? blockStyle.textIndent - : 0; + const int firstLineIndent = + isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing && + (blockStyle.alignment == CssTextAlign::Justify || blockStyle.alignment == CssTextAlign::Left) + ? blockStyle.textIndent + : 0; - // Calculate total word width for this line and count actual word gaps - // (punctuation that attaches to previous word doesn't count as a gap) - // Note: words list starts at the beginning because previous lines were spliced out + // Calculate total word width for this line int lineWordWidthSum = 0; - size_t actualGapCount = 0; - auto countWordIt = words.begin(); - - for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { - lineWordWidthSum += wordWidths[lastBreakAt + wordIdx]; - // Count gaps: each word after the first creates a gap, unless it's attaching punctuation - if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) { - actualGapCount++; - } - ++countWordIt; + for (size_t i = lastBreakAt; i < lineBreak; i++) { + lineWordWidthSum += wordWidths[i]; } // Calculate spacing (account for indent reducing effective page width on first line) @@ -409,54 +387,37 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spacing = spaceWidth; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; - // For justified text, calculate spacing based on actual gap count - if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) { - spacing = spareSpace / static_cast(actualGapCount); + if (blockStyle.alignment == CssTextAlign::Justify && !isLastLine && lineWordCount >= 2) { + spacing = spareSpace / (lineWordCount - 1); } // Calculate initial x position (first line starts at indent for left/justified text) auto xpos = static_cast(firstLineIndent); - if (style == TextBlock::RIGHT_ALIGN) { - xpos = spareSpace - static_cast(actualGapCount) * spaceWidth; - } else if (style == TextBlock::CENTER_ALIGN) { - xpos = (spareSpace - static_cast(actualGapCount) * spaceWidth) / 2; + if (blockStyle.alignment == CssTextAlign::Right) { + xpos = spareSpace - (lineWordCount - 1) * spaceWidth; + } else if (blockStyle.alignment == CssTextAlign::Center) { + xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; } // Pre-calculate X positions for words - // Punctuation that attaches to the previous word doesn't get space before it - // Note: words list starts at the beginning because previous lines were spliced out std::list lineXPos; - auto wordIt = words.begin(); - - for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) { - const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx]; - + for (size_t i = lastBreakAt; i < lineBreak; i++) { + const uint16_t currentWordWidth = wordWidths[i]; lineXPos.push_back(xpos); - - // Add spacing after this word, unless the next word is attaching punctuation - auto nextWordIt = wordIt; - ++nextWordIt; - const bool nextIsAttachingPunctuation = wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt); - - xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing); - ++wordIt; + xpos += currentWordWidth + spacing; } // Iterators always start at the beginning as we are moving content with splice below auto wordEndIt = words.begin(); auto wordStyleEndIt = wordStyles.begin(); - auto wordUnderlineEndIt = wordUnderlines.begin(); std::advance(wordEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount); - std::advance(wordUnderlineEndIt, lineWordCount); // *** CRITICAL STEP: CONSUME DATA USING SPLICE *** std::list lineWords; lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); std::list lineWordStyles; lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); - std::list lineWordUnderlines; - lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt); for (auto& word : lineWords) { if (containsSoftHyphen(word)) { @@ -464,6 +425,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const } } - processLine(std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style, - blockStyle, std::move(lineWordUnderlines))); + processLine( + std::make_shared(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), blockStyle)); } diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h index cc2596c5..a13d13b5 100644 --- a/lib/Epub/Epub/ParsedText.h +++ b/lib/Epub/Epub/ParsedText.h @@ -16,8 +16,6 @@ class GfxRenderer; class ParsedText { std::list words; std::list wordStyles; - std::list wordUnderlines; // Track underline per word - TextBlock::Style style; BlockStyle blockStyle; bool extraParagraphSpacing; bool hyphenationEnabled; @@ -35,19 +33,14 @@ class ParsedText { std::vector calculateWordWidths(const GfxRenderer& renderer, int fontId); public: - explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, - const bool hyphenationEnabled = false, const BlockStyle& blockStyle = BlockStyle()) - : style(style), - blockStyle(blockStyle), - extraParagraphSpacing(extraParagraphSpacing), - hyphenationEnabled(hyphenationEnabled) {} + explicit ParsedText(const bool extraParagraphSpacing, const bool hyphenationEnabled = false, + const BlockStyle& blockStyle = BlockStyle()) + : blockStyle(blockStyle), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} ~ParsedText() = default; void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false); - void setStyle(const TextBlock::Style style) { this->style = style; } void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } - TextBlock::Style getStyle() const { return style; } - const BlockStyle& getBlockStyle() const { return blockStyle; } + BlockStyle& getBlockStyle() { return blockStyle; } size_t size() const { return words.size(); } bool isEmpty() const { return words.empty(); } void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 724471b6..3ab25558 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -14,15 +14,14 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int auto wordIt = words.begin(); auto wordStylesIt = wordStyles.begin(); auto wordXposIt = wordXpos.begin(); - auto wordUnderlineIt = wordUnderlines.begin(); for (size_t i = 0; i < words.size(); i++) { const int wordX = *wordXposIt + x; - renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt); + const EpdFontFamily::Style currentStyle = *wordStylesIt; + renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, currentStyle); - // Draw underline if word is underlined - if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) { + if ((currentStyle & EpdFontFamily::UNDERLINE) != 0) { const std::string& w = *wordIt; - const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt); + const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), currentStyle); // y is the top of the text line; add ascender to reach baseline, then offset 2px below const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2; @@ -34,7 +33,7 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int static_cast(w[2]) == 0x83) { const char* visiblePtr = w.c_str() + 3; const int prefixWidth = renderer.getTextAdvanceX(fontId, std::string("\xe2\x80\x83").c_str()); - const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt); + const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, currentStyle); startX = wordX + prefixWidth; underlineWidth = visibleWidth; } @@ -45,9 +44,6 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int std::advance(wordIt, 1); std::advance(wordStylesIt, 1); std::advance(wordXposIt, 1); - if (wordUnderlineIt != wordUnderlines.end()) { - std::advance(wordUnderlineIt, 1); - } } } @@ -64,29 +60,9 @@ bool TextBlock::serialize(FsFile& file) const { for (auto x : wordXpos) serialization::writePod(file, x); for (auto s : wordStyles) serialization::writePod(file, s); - // Underline flags (packed as bytes, 8 words per byte) - uint8_t underlineByte = 0; - int bitIndex = 0; - auto underlineIt = wordUnderlines.begin(); - for (size_t i = 0; i < words.size(); i++) { - if (underlineIt != wordUnderlines.end() && *underlineIt) { - underlineByte |= 1 << bitIndex; - } - bitIndex++; - if (bitIndex == 8 || i == words.size() - 1) { - serialization::writePod(file, underlineByte); - underlineByte = 0; - bitIndex = 0; - } - if (underlineIt != wordUnderlines.end()) { - ++underlineIt; - } - } - - // Block style (alignment) - serialization::writePod(file, style); - - // Block style (margins/padding/indent) + // Style (alignment + margins/padding/indent) + serialization::writePod(file, blockStyle.alignment); + serialization::writePod(file, blockStyle.textAlignDefined); serialization::writePod(file, blockStyle.marginTop); serialization::writePod(file, blockStyle.marginBottom); serialization::writePod(file, blockStyle.marginLeft); @@ -106,8 +82,6 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { std::list words; std::list wordXpos; std::list wordStyles; - std::list wordUnderlines; - Style style; BlockStyle blockStyle; // Word count @@ -127,23 +101,9 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& s : wordStyles) serialization::readPod(file, s); - // Underline flags (packed as bytes, 8 words per byte) - wordUnderlines.resize(wc, false); - auto underlineIt = wordUnderlines.begin(); - const int bytesNeeded = (wc + 7) / 8; - for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) { - uint8_t underlineByte; - serialization::readPod(file, underlineByte); - for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) { - *underlineIt = (underlineByte & 1 << bit) != 0; - ++underlineIt; - } - } - - // Block style (alignment) - serialization::readPod(file, style); - - // Block style (margins/padding/indent) + // Style (alignment + margins/padding/indent) + serialization::readPod(file, blockStyle.alignment); + serialization::readPod(file, blockStyle.textAlignDefined); serialization::readPod(file, blockStyle.marginTop); serialization::readPod(file, blockStyle.marginBottom); serialization::readPod(file, blockStyle.marginLeft); @@ -155,6 +115,6 @@ std::unique_ptr TextBlock::deserialize(FsFile& file) { serialization::readPod(file, blockStyle.textIndent); serialization::readPod(file, blockStyle.textIndentDefined); - return std::unique_ptr(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style, - blockStyle, std::move(wordUnderlines))); + return std::unique_ptr( + new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), blockStyle)); } diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index e7993fe8..e233f77f 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -11,41 +11,21 @@ // Represents a line of text on a page class TextBlock final : public Block { - public: - enum Style : uint8_t { - JUSTIFIED = 0, - LEFT_ALIGN = 1, - CENTER_ALIGN = 2, - RIGHT_ALIGN = 3, - }; - private: std::list words; std::list wordXpos; std::list wordStyles; - std::list wordUnderlines; // Track underline per word - Style style; BlockStyle blockStyle; public: explicit TextBlock(std::list words, std::list word_xpos, - std::list word_styles, const Style style, - const BlockStyle& blockStyle = BlockStyle(), std::list word_underlines = std::list()) + std::list word_styles, const BlockStyle& blockStyle = BlockStyle()) : words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), - wordUnderlines(std::move(word_underlines)), - style(style), - blockStyle(blockStyle) { - // Ensure underlines list matches words list size - while (this->wordUnderlines.size() < this->words.size()) { - this->wordUnderlines.push_back(false); - } - } + blockStyle(blockStyle) {} ~TextBlock() override = default; - void setStyle(const Style style) { this->style = style; } void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; } - Style getStyle() const { return style; } const BlockStyle& getBlockStyle() const { return blockStyle; } bool isEmpty() override { return words.empty(); } void layout(GfxRenderer& renderer) override {}; diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp index 68737279..ab93d9cb 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp @@ -43,39 +43,17 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib return false; } -// Create a BlockStyle from CSS style properties, resolving CssLength values to pixels -// emSize is the current font line height, used for em/rem unit conversion -BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle, const float emSize) { - BlockStyle blockStyle; - // Resolve all CssLength values to pixels using the current font's em size - const int16_t marginTopPx = cssStyle.marginTop.toPixelsInt16(emSize); - const int16_t marginBottomPx = cssStyle.marginBottom.toPixelsInt16(emSize); - const int16_t paddingTopPx = cssStyle.paddingTop.toPixelsInt16(emSize); - const int16_t paddingBottomPx = cssStyle.paddingBottom.toPixelsInt16(emSize); - - // Vertical: combine margin and padding for top/bottom spacing - blockStyle.marginTop = static_cast(marginTopPx + paddingTopPx); - blockStyle.marginBottom = static_cast(marginBottomPx + paddingBottomPx); - blockStyle.paddingTop = paddingTopPx; - blockStyle.paddingBottom = paddingBottomPx; - // Horizontal: store margin and padding separately for layout calculations - blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize); - blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize); - blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize); - blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize); - // Text indent - blockStyle.textIndent = cssStyle.textIndent.toPixelsInt16(emSize); - blockStyle.textIndentDefined = cssStyle.defined.textIndent; - return blockStyle; +bool isHeaderOrBlock(const char* name) { + return matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); } // Update effective bold/italic/underline based on block style and inline style stack void ChapterHtmlSlimParser::updateEffectiveInlineStyle() { // Start with block-level styles - effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold; - effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic; + effectiveBold = currentCssStyle.hasFontWeight() && currentCssStyle.fontWeight == CssFontWeight::Bold; + effectiveItalic = currentCssStyle.hasFontStyle() && currentCssStyle.fontStyle == CssFontStyle::Italic; effectiveUnderline = - currentBlockStyle.hasTextDecoration() && currentBlockStyle.textDecoration == CssTextDecoration::Underline; + currentCssStyle.hasTextDecoration() && currentCssStyle.textDecoration == CssTextDecoration::Underline; // Apply inline style stack in order for (const auto& entry : inlineStyleStack) { @@ -98,69 +76,41 @@ void ChapterHtmlSlimParser::flushPartWordBuffer() { const bool isItalic = italicUntilDepth < depth || effectiveItalic; const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline; + // Combine style flags using bitwise OR EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; - if (isBold && isItalic) { - fontStyle = EpdFontFamily::BOLD_ITALIC; - } else if (isBold) { - fontStyle = EpdFontFamily::BOLD; - } else if (isItalic) { - fontStyle = EpdFontFamily::ITALIC; + if (isBold) { + fontStyle = static_cast(fontStyle | EpdFontFamily::BOLD); + } + if (isItalic) { + fontStyle = static_cast(fontStyle | EpdFontFamily::ITALIC); + } + if (isUnderline) { + fontStyle = static_cast(fontStyle | EpdFontFamily::UNDERLINE); } // flush the buffer partWordBuffer[partWordBufferIndex] = '\0'; - currentTextBlock->addWord(partWordBuffer, fontStyle, isUnderline); + currentTextBlock->addWord(partWordBuffer, fontStyle); partWordBufferIndex = 0; } -// Merge block styles for nested block elements -// When a child block element is inside a parent with no direct text content, -// we accumulate their margins so nested containers properly contribute spacing -BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) { - BlockStyle merged; - // Vertical margins: sum them (nested blocks create additive spacing) - merged.marginTop = static_cast(parent.marginTop + child.marginTop); - merged.marginBottom = static_cast(parent.marginBottom + child.marginBottom); - // Horizontal margins: sum them (nested blocks create cumulative indentation) - merged.marginLeft = static_cast(parent.marginLeft + child.marginLeft); - merged.marginRight = static_cast(parent.marginRight + child.marginRight); - // Padding: sum them - merged.paddingTop = static_cast(parent.paddingTop + child.paddingTop); - merged.paddingBottom = static_cast(parent.paddingBottom + child.paddingBottom); - merged.paddingLeft = static_cast(parent.paddingLeft + child.paddingLeft); - merged.paddingRight = static_cast(parent.paddingRight + child.paddingRight); - // Text indent: use child's if defined, otherwise inherit parent's - if (child.textIndentDefined) { - merged.textIndent = child.textIndent; - merged.textIndentDefined = true; - } else if (parent.textIndentDefined) { - merged.textIndent = parent.textIndent; - merged.textIndentDefined = true; - } - return merged; -} - // start a new text block if needed -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) { +void ChapterHtmlSlimParser::startNewTextBlock(const BlockStyle& blockStyle) { if (currentTextBlock) { // already have a text block running and it is empty - just reuse it if (currentTextBlock->isEmpty()) { - currentTextBlock->setStyle(style); - // Merge with existing block style to accumulate margins from parent block elements - // This handles cases like

text

where the - // div's margin should be preserved even though it has no direct text content - const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle); - currentTextBlock->setBlockStyle(merged); + // Merge with existing block style to accumulate CSS styling from parent block elements. + // This handles cases like

text

where the + // div's margin should be preserved, even though it has no direct text content. + currentTextBlock->setBlockStyle(currentTextBlock->getBlockStyle().getCombinedBlockStyle(blockStyle)); return; } makePages(); } - currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled, blockStyle)); + currentTextBlock.reset(new ParsedText(extraParagraphSpacing, hyphenationEnabled, blockStyle)); } -void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); } - void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { auto* self = static_cast(userData); @@ -183,13 +133,17 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } + auto centeredBlockStyle = BlockStyle(); + centeredBlockStyle.textAlignDefined = true; + centeredBlockStyle.alignment = CssTextAlign::Center; + // Special handling for tables - show placeholder text instead of dropping silently if (strcmp(name, "table") == 0) { // Add placeholder text - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, "[Table omitted]", strlen("[Table omitted]")); @@ -214,9 +168,9 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* Serial.printf("[%lu] [EHP] Image alt: %s\n", millis(), alt.c_str()); - self->startNewTextBlock(TextBlock::CENTER_ALIGN); + self->startNewTextBlock(centeredBlockStyle); self->italicUntilDepth = min(self->italicUntilDepth, self->depth); - // Advance depth before processing character data (like you would for a element with text) + // Advance depth before processing character data (like you would for an element with text) self->depth += 1; self->characterData(userData, alt.c_str(), alt.length()); @@ -244,9 +198,6 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } - // Determine if this is a block element - bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS); - // Compute CSS style for this element CssStyle cssStyle; if (self->cssParser) { @@ -259,30 +210,12 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } } - if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - // Headers: center aligned, bold, apply CSS overrides - TextBlock::Style alignment = TextBlock::CENTER_ALIGN; - if (cssStyle.hasTextAlign()) { - switch (cssStyle.textAlign) { - case CssTextAlign::Left: - alignment = TextBlock::LEFT_ALIGN; - break; - case CssTextAlign::Right: - alignment = TextBlock::RIGHT_ALIGN; - break; - case CssTextAlign::Center: - alignment = TextBlock::CENTER_ALIGN; - break; - case CssTextAlign::Justify: - alignment = TextBlock::JUSTIFIED; - break; - default: - break; - } - } + const float emSize = static_cast(self->renderer.getLineHeight(self->fontId)) * self->lineCompression; + const auto userAlignment = static_cast(self->paragraphAlignment); - self->currentBlockStyle = cssStyle; - self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId))); + if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); self->updateEffectiveInlineStyle(); } else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { @@ -291,31 +224,10 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* // flush word preceding
to currentTextBlock before calling startNewTextBlock self->flushPartWordBuffer(); } - self->startNewTextBlock(self->currentTextBlock->getStyle()); + self->startNewTextBlock(self->currentTextBlock->getBlockStyle()); } else { - // Determine alignment from CSS or default - auto alignment = static_cast(self->paragraphAlignment); - if (cssStyle.hasTextAlign()) { - switch (cssStyle.textAlign) { - case CssTextAlign::Left: - alignment = TextBlock::LEFT_ALIGN; - break; - case CssTextAlign::Right: - alignment = TextBlock::RIGHT_ALIGN; - break; - case CssTextAlign::Center: - alignment = TextBlock::CENTER_ALIGN; - break; - case CssTextAlign::Justify: - alignment = TextBlock::JUSTIFIED; - break; - default: - break; - } - } - - self->currentBlockStyle = cssStyle; - self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId))); + self->currentCssStyle = cssStyle; + self->startNewTextBlock(BlockStyle::fromCssStyle(cssStyle, emSize, userAlignment)); self->updateEffectiveInlineStyle(); if (strcmp(name, "li") == 0) { @@ -373,7 +285,7 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* } self->inlineStyleStack.push_back(entry); self->updateEffectiveInlineStyle(); - } else if (strcmp(name, "span") == 0 || !isBlockElement) { + } else if (strcmp(name, "span") == 0 || !isHeaderOrBlock(name)) { // Handle span and other inline elements for CSS styling if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) { StyleStackEntry entry; @@ -464,12 +376,12 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1; const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline; + const bool headerOrBlockTag = isHeaderOrBlock(name); // Flush buffer with current style BEFORE any style changes if (self->partWordBufferIndex > 0) { // Flush if style will change OR if we're closing a block/structural element - const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || - matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || + const bool shouldFlush = styleWillChange || headerOrBlockTag || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1; @@ -508,15 +420,18 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n self->updateEffectiveInlineStyle(); } - // Clear block style when leaving block elements - if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { - self->currentBlockStyle.reset(); + // Clear block style when leaving header or block elements + if (headerOrBlockTag) { + self->currentCssStyle.reset(); self->updateEffectiveInlineStyle(); } } bool ChapterHtmlSlimParser::parseAndBuildPages() { - startNewTextBlock((TextBlock::Style)this->paragraphAlignment); + auto paragraphAlignmentBlockStyle = BlockStyle(); + paragraphAlignmentBlockStyle.textAlignDefined = true; + paragraphAlignmentBlockStyle.alignment = static_cast(this->paragraphAlignment); + startNewTextBlock(paragraphAlignmentBlockStyle); const XML_Parser parser = XML_ParserCreate(nullptr); int done; @@ -624,11 +539,14 @@ void ChapterHtmlSlimParser::makePages() { const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; - // Apply marginTop before the paragraph (stored in pixels) + // Apply top spacing before the paragraph (stored in pixels) const BlockStyle& blockStyle = currentTextBlock->getBlockStyle(); if (blockStyle.marginTop > 0) { currentPageNextY += blockStyle.marginTop; } + if (blockStyle.paddingTop > 0) { + currentPageNextY += blockStyle.paddingTop; + } // Calculate effective width accounting for horizontal margins/padding const int horizontalInset = blockStyle.totalHorizontalInset(); @@ -639,10 +557,13 @@ void ChapterHtmlSlimParser::makePages() { renderer, fontId, effectiveWidth, [this](const std::shared_ptr& textBlock) { addLineToPage(textBlock); }); - // Apply marginBottom after the paragraph (stored in pixels) + // Apply bottom spacing after the paragraph (stored in pixels) if (blockStyle.marginBottom > 0) { currentPageNextY += blockStyle.marginBottom; } + if (blockStyle.paddingBottom > 0) { + currentPageNextY += blockStyle.paddingBottom; + } // Extra paragraph spacing if enabled (default behavior) if (extraParagraphSpacing) { diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h index 7d9803f5..92a9838a 100644 --- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h +++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h @@ -50,14 +50,13 @@ class ChapterHtmlSlimParser { bool hasUnderline = false, underline = false; }; std::vector inlineStyleStack; - CssStyle currentBlockStyle; + CssStyle currentCssStyle; bool effectiveBold = false; bool effectiveItalic = false; bool effectiveUnderline = false; void updateEffectiveInlineStyle(); - void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle); - void startNewTextBlock(TextBlock::Style style); + void startNewTextBlock(const BlockStyle& blockStyle); void flushPartWordBuffer(); void makePages(); // XML callbacks From a7ffc02c340b33c4c00761d094a41e08d3ad1abb Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:43:44 -0500 Subject: [PATCH 17/20] refactor: simplify CssParser margin/padding shorthand Remove intermediary variables (top, right, bottom, left) in margin and padding shorthand parsing. Directly assign to style fields and reference previously assigned values for defaulting logic. No functional change - purely code simplification. --- lib/Epub/Epub/css/CssParser.cpp | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index afcf7e95..2ccaafe9 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -382,14 +382,10 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { // Shorthand: 1-4 values for top, right, bottom, left const auto values = splitWhitespace(propValue); if (!values.empty()) { - const CssLength top = interpretLength(values[0]); - const CssLength right = values.size() >= 2 ? interpretLength(values[1]) : top; - const CssLength bottom = values.size() >= 3 ? interpretLength(values[2]) : top; - const CssLength left = values.size() >= 4 ? interpretLength(values[3]) : right; - style.marginTop = top; - style.marginRight = right; - style.marginBottom = bottom; - style.marginLeft = left; + style.marginTop = interpretLength(values[0]); + style.marginRight = values.size() >= 2 ? interpretLength(values[1]) : style.marginTop; + style.marginBottom = values.size() >= 3 ? interpretLength(values[2]) : style.marginTop; + style.marginLeft = values.size() >= 4 ? interpretLength(values[3]) : style.marginRight; style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1; } } else if (propName == "padding-top") { @@ -408,14 +404,10 @@ CssStyle CssParser::parseDeclarations(const std::string& declBlock) { // Shorthand: 1-4 values for top, right, bottom, left const auto values = splitWhitespace(propValue); if (!values.empty()) { - const CssLength top = interpretLength(values[0]); - const CssLength right = values.size() >= 2 ? interpretLength(values[1]) : top; - const CssLength bottom = values.size() >= 3 ? interpretLength(values[2]) : top; - const CssLength left = values.size() >= 4 ? interpretLength(values[3]) : right; - style.paddingTop = top; - style.paddingRight = right; - style.paddingBottom = bottom; - style.paddingLeft = left; + style.paddingTop = interpretLength(values[0]); + style.paddingRight = values.size() >= 2 ? interpretLength(values[1]) : style.paddingTop; + style.paddingBottom = values.size() >= 3 ? interpretLength(values[2]) : style.paddingTop; + style.paddingLeft = values.size() >= 4 ? interpretLength(values[3]) : style.paddingRight; style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom = style.defined.paddingLeft = 1; } From 6115bf3cd22f85dec39e38e0014c0a7622e35da8 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:44:30 -0500 Subject: [PATCH 18/20] feat: add CSS rules caching to CssParser Add saveToCache() and loadFromCache() methods to CssParser for persisting parsed CSS rules to disk. The cache format includes: - Version byte for cache invalidation - Rule count - For each rule: length-prefixed selector string + CssStyle fields This allows skipping CSS file parsing on subsequent book opens by loading pre-parsed rules from cache. --- lib/Epub/Epub/css/CssParser.cpp | 181 ++++++++++++++++++++++++++++++++ lib/Epub/Epub/css/CssParser.h | 15 +++ 2 files changed, 196 insertions(+) diff --git a/lib/Epub/Epub/css/CssParser.cpp b/lib/Epub/Epub/css/CssParser.cpp index 2ccaafe9..d51ebba7 100644 --- a/lib/Epub/Epub/css/CssParser.cpp +++ b/lib/Epub/Epub/css/CssParser.cpp @@ -514,3 +514,184 @@ CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& // Inline style parsing (static - doesn't need rule database) CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); } + +// Cache serialization + +// Cache format version - increment when format changes +constexpr uint8_t CSS_CACHE_VERSION = 1; + +bool CssParser::saveToCache(FsFile& file) const { + if (!file) { + return false; + } + + // Write version + file.write(CSS_CACHE_VERSION); + + // Write rule count + const auto ruleCount = static_cast(rulesBySelector_.size()); + file.write(reinterpret_cast(&ruleCount), sizeof(ruleCount)); + + // Write each rule: selector string + CssStyle fields + for (const auto& pair : rulesBySelector_) { + // Write selector string (length-prefixed) + const auto selectorLen = static_cast(pair.first.size()); + file.write(reinterpret_cast(&selectorLen), sizeof(selectorLen)); + file.write(reinterpret_cast(pair.first.data()), selectorLen); + + // Write CssStyle fields (all are POD types) + const CssStyle& style = pair.second; + file.write(static_cast(style.textAlign)); + file.write(static_cast(style.fontStyle)); + file.write(static_cast(style.fontWeight)); + file.write(static_cast(style.textDecoration)); + + // Write CssLength fields (value + unit) + auto writeLength = [&file](const CssLength& len) { + file.write(reinterpret_cast(&len.value), sizeof(len.value)); + file.write(static_cast(len.unit)); + }; + + writeLength(style.textIndent); + writeLength(style.marginTop); + writeLength(style.marginBottom); + writeLength(style.marginLeft); + writeLength(style.marginRight); + writeLength(style.paddingTop); + writeLength(style.paddingBottom); + writeLength(style.paddingLeft); + writeLength(style.paddingRight); + + // Write defined flags as uint16_t + uint16_t definedBits = 0; + if (style.defined.textAlign) definedBits |= 1 << 0; + if (style.defined.fontStyle) definedBits |= 1 << 1; + if (style.defined.fontWeight) definedBits |= 1 << 2; + if (style.defined.textDecoration) definedBits |= 1 << 3; + if (style.defined.textIndent) definedBits |= 1 << 4; + if (style.defined.marginTop) definedBits |= 1 << 5; + if (style.defined.marginBottom) definedBits |= 1 << 6; + if (style.defined.marginLeft) definedBits |= 1 << 7; + if (style.defined.marginRight) definedBits |= 1 << 8; + if (style.defined.paddingTop) definedBits |= 1 << 9; + if (style.defined.paddingBottom) definedBits |= 1 << 10; + if (style.defined.paddingLeft) definedBits |= 1 << 11; + if (style.defined.paddingRight) definedBits |= 1 << 12; + file.write(reinterpret_cast(&definedBits), sizeof(definedBits)); + } + + Serial.printf("[%lu] [CSS] Saved %u rules to cache\n", millis(), ruleCount); + return true; +} + +bool CssParser::loadFromCache(FsFile& file) { + if (!file) { + return false; + } + + // Clear existing rules + clear(); + + // Read and verify version + uint8_t version = 0; + if (file.read(&version, 1) != 1 || version != CSS_CACHE_VERSION) { + Serial.printf("[%lu] [CSS] Cache version mismatch (got %u, expected %u)\n", millis(), version, CSS_CACHE_VERSION); + return false; + } + + // Read rule count + uint16_t ruleCount = 0; + if (file.read(&ruleCount, sizeof(ruleCount)) != sizeof(ruleCount)) { + return false; + } + + // Read each rule + for (uint16_t i = 0; i < ruleCount; ++i) { + // Read selector string + uint16_t selectorLen = 0; + if (file.read(&selectorLen, sizeof(selectorLen)) != sizeof(selectorLen)) { + rulesBySelector_.clear(); + return false; + } + + std::string selector; + selector.resize(selectorLen); + if (file.read(&selector[0], selectorLen) != selectorLen) { + rulesBySelector_.clear(); + return false; + } + + // Read CssStyle fields + CssStyle style; + uint8_t enumVal; + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.textAlign = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.fontStyle = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.fontWeight = static_cast(enumVal); + + if (file.read(&enumVal, 1) != 1) { + rulesBySelector_.clear(); + return false; + } + style.textDecoration = static_cast(enumVal); + + // Read CssLength fields + auto readLength = [&file](CssLength& len) -> bool { + if (file.read(&len.value, sizeof(len.value)) != sizeof(len.value)) { + return false; + } + uint8_t unitVal; + if (file.read(&unitVal, 1) != 1) { + return false; + } + len.unit = static_cast(unitVal); + return true; + }; + + if (!readLength(style.textIndent) || !readLength(style.marginTop) || !readLength(style.marginBottom) || + !readLength(style.marginLeft) || !readLength(style.marginRight) || !readLength(style.paddingTop) || + !readLength(style.paddingBottom) || !readLength(style.paddingLeft) || !readLength(style.paddingRight)) { + rulesBySelector_.clear(); + return false; + } + + // Read defined flags + uint16_t definedBits = 0; + if (file.read(&definedBits, sizeof(definedBits)) != sizeof(definedBits)) { + rulesBySelector_.clear(); + return false; + } + style.defined.textAlign = (definedBits & 1 << 0) != 0; + style.defined.fontStyle = (definedBits & 1 << 1) != 0; + style.defined.fontWeight = (definedBits & 1 << 2) != 0; + style.defined.textDecoration = (definedBits & 1 << 3) != 0; + style.defined.textIndent = (definedBits & 1 << 4) != 0; + style.defined.marginTop = (definedBits & 1 << 5) != 0; + style.defined.marginBottom = (definedBits & 1 << 6) != 0; + style.defined.marginLeft = (definedBits & 1 << 7) != 0; + style.defined.marginRight = (definedBits & 1 << 8) != 0; + style.defined.paddingTop = (definedBits & 1 << 9) != 0; + style.defined.paddingBottom = (definedBits & 1 << 10) != 0; + style.defined.paddingLeft = (definedBits & 1 << 11) != 0; + style.defined.paddingRight = (definedBits & 1 << 12) != 0; + + rulesBySelector_[selector] = style; + } + + Serial.printf("[%lu] [CSS] Loaded %u rules from cache\n", millis(), ruleCount); + return true; +} diff --git a/lib/Epub/Epub/css/CssParser.h b/lib/Epub/Epub/css/CssParser.h index 9915485d..0e5a1b34 100644 --- a/lib/Epub/Epub/css/CssParser.h +++ b/lib/Epub/Epub/css/CssParser.h @@ -76,6 +76,21 @@ class CssParser { */ void clear() { rulesBySelector_.clear(); } + /** + * Save parsed CSS rules to a cache file. + * @param file Open file handle to write to + * @return true if cache was written successfully + */ + bool saveToCache(FsFile& file) const; + + /** + * Load CSS rules from a cache file. + * Clears any existing rules before loading. + * @param file Open file handle to read from + * @return true if cache was loaded successfully + */ + bool loadFromCache(FsFile& file); + private: // Storage: maps normalized selector -> style properties std::unordered_map rulesBySelector_; From cd61b263e5e1319f73e6e5ebf2c59cd5cc3e80ef Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:44:58 -0500 Subject: [PATCH 19/20] feat: integrate CSS rules caching in Epub loader - Add cssFiles member to Epub class (moved from BookMetadataCache) - Add getCssRulesCache() and loadCssRulesFromCache() methods - Update parseCssFiles() to save parsed rules to cache - Try loading from css_rules.cache before parsing CSS files - Add skipLoadingCss parameter to Epub::load() for performance - Remove cssFiles from BookMetadataCache (no longer needed) - Revert BookMetadataCache version to 5 (pre-CSS-files format) When loading an EPUB: 1. Try to load cached CSS rules first 2. If cache miss, parse CSS files and save to cache 3. If skipLoadingCss=true, skip CSS entirely (for cover display) --- lib/Epub/Epub.cpp | 110 +++++++++++++++++----------- lib/Epub/Epub.h | 8 +- lib/Epub/Epub/BookMetadataCache.cpp | 24 +----- lib/Epub/Epub/BookMetadataCache.h | 1 - 4 files changed, 77 insertions(+), 66 deletions(-) diff --git a/lib/Epub/Epub.cpp b/lib/Epub/Epub.cpp index b135da11..5dc02358 100644 --- a/lib/Epub/Epub.cpp +++ b/lib/Epub/Epub.cpp @@ -86,8 +86,9 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) { tocNavItem = opfParser.tocNavPath; } - // Copy CSS files to metadata - bookMetadata.cssFiles = opfParser.cssFiles; + if (!opfParser.cssFiles.empty()) { + cssFiles = opfParser.cssFiles; + } Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); return true; @@ -207,66 +208,91 @@ bool Epub::parseTocNavFile() const { return true; } -bool Epub::parseCssFiles() { - if (!bookMetadataCache || !bookMetadataCache->isLoaded()) { - Serial.printf("[%lu] [EBP] Cannot parse CSS, cache not loaded\n", millis()); - return false; +std::string Epub::getCssRulesCache() const { return cachePath + "/css_rules.cache"; } + +bool Epub::loadCssRulesFromCache() const { + FsFile cssCacheFile; + if (SdMan.openFileForRead("EBP", getCssRulesCache(), cssCacheFile)) { + if (cssParser->loadFromCache(cssCacheFile)) { + cssCacheFile.close(); + Serial.printf("[%lu] [EBP] Loaded CSS rules from cache\n", millis()); + return true; + } + cssCacheFile.close(); + Serial.printf("[%lu] [EBP] CSS cache invalid, reparsing\n", millis()); } + return false; +} - // Always create CssParser - needed for inline style parsing even without CSS files - cssParser.reset(new CssParser()); - - const auto& cssFiles = bookMetadataCache->coreMetadata.cssFiles; +void Epub::parseCssFiles() const { if (cssFiles.empty()) { Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis()); - return true; } - for (const auto& cssPath : cssFiles) { - Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str()); + // Try to load from CSS cache first + if (!loadCssRulesFromCache()) { + // Cache miss - parse CSS files + for (const auto& cssPath : cssFiles) { + Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str()); - // Extract CSS file to temp location - const auto tmpCssPath = getCachePath() + "/.tmp.css"; - FsFile tempCssFile; - if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) { - Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis()); - continue; - } - if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) { - Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str()); + // Extract CSS file to temp location + const auto tmpCssPath = getCachePath() + "/.tmp.css"; + FsFile tempCssFile; + if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis()); + continue; + } + if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) { + Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str()); + tempCssFile.close(); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + tempCssFile.close(); + + // Parse the CSS file + if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) { + Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis()); + SdMan.remove(tmpCssPath.c_str()); + continue; + } + cssParser->loadFromStream(tempCssFile); tempCssFile.close(); SdMan.remove(tmpCssPath.c_str()); - continue; } - tempCssFile.close(); - // Parse the CSS file - if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) { - Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis()); - SdMan.remove(tmpCssPath.c_str()); - continue; + // Save to cache for next time + FsFile cssCacheFile; + if (SdMan.openFileForWrite("EBP", getCssRulesCache(), cssCacheFile)) { + cssParser->saveToCache(cssCacheFile); + cssCacheFile.close(); } - cssParser->loadFromStream(tempCssFile); - tempCssFile.close(); - SdMan.remove(tmpCssPath.c_str()); + + Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(), + cssFiles.size()); } - - Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(), - cssFiles.size()); - return true; } // load in the meta data for the epub file -bool Epub::load(const bool buildIfMissing) { +bool Epub::load(const bool buildIfMissing, const bool skipLoadingCss) { Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); // Initialize spine/TOC cache bookMetadataCache.reset(new BookMetadataCache(cachePath)); + // Always create CssParser - needed for inline style parsing even without CSS files + cssParser.reset(new CssParser()); // Try to load existing cache first if (bookMetadataCache->load()) { - // Parse CSS files from loaded cache - parseCssFiles(); + if (!skipLoadingCss && !loadCssRulesFromCache()) { + Serial.printf("[%lu] [EBP] Warning: CSS rules cache not found, attempting to parse CSS files\n", millis()); + // to get CSS file list + if (!parseContentOpf(bookMetadataCache->coreMetadata)) { + Serial.printf("[%lu] [EBP] Could not parse content.opf from cached bookMetadata for CSS files\n", millis()); + // continue anyway - book will work without CSS and we'll still load any inline style CSS + } + parseCssFiles(); + } Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; } @@ -363,8 +389,10 @@ bool Epub::load(const bool buildIfMissing) { return false; } - // Parse CSS files after cache reload - parseCssFiles(); + if (!skipLoadingCss) { + // Parse CSS files after cache reload + parseCssFiles(); + } Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); return true; diff --git a/lib/Epub/Epub.h b/lib/Epub/Epub.h index 4f948c77..85a601a5 100644 --- a/lib/Epub/Epub.h +++ b/lib/Epub/Epub.h @@ -27,12 +27,16 @@ class Epub { std::unique_ptr bookMetadataCache; // CSS parser for styling std::unique_ptr cssParser; + // CSS files + std::vector cssFiles; bool findContentOpfFile(std::string* contentOpfFile) const; bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseTocNcxFile() const; bool parseTocNavFile() const; - bool parseCssFiles(); + void parseCssFiles() const; + std::string getCssRulesCache() const; + bool loadCssRulesFromCache() const; public: explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { @@ -41,7 +45,7 @@ class Epub { } ~Epub() = default; std::string& getBasePath() { return contentBasePath; } - bool load(bool buildIfMissing = true); + bool load(bool buildIfMissing = true, bool skipLoadingCss = false); bool clearCache() const; void setupCacheDir() const; const std::string& getCachePath() const; diff --git a/lib/Epub/Epub/BookMetadataCache.cpp b/lib/Epub/Epub/BookMetadataCache.cpp index 01dc87e4..e7242138 100644 --- a/lib/Epub/Epub/BookMetadataCache.cpp +++ b/lib/Epub/Epub/BookMetadataCache.cpp @@ -9,7 +9,7 @@ #include "FsHelpers.h" namespace { -constexpr uint8_t BOOK_CACHE_VERSION = 6; +constexpr uint8_t BOOK_CACHE_VERSION = 5; constexpr char bookBinFile[] = "/book.bin"; constexpr char tmpSpineBinFile[] = "/spine.bin.tmp"; constexpr char tmpTocBinFile[] = "/toc.bin.tmp"; @@ -115,14 +115,9 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta constexpr uint32_t headerASize = sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount); - // Calculate CSS files size: count + each string (length + data) - uint32_t cssFilesSize = sizeof(uint16_t); // count - for (const auto& css : metadata.cssFiles) { - cssFilesSize += sizeof(uint32_t) + css.size(); - } const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.language.size() + metadata.coverItemHref.size() + metadata.textReferenceHref.size() + - sizeof(uint32_t) * 5 + cssFilesSize; + sizeof(uint32_t) * 5; const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount; const uint32_t lutOffset = headerASize + metadataSize; @@ -137,11 +132,6 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta serialization::writeString(bookFile, metadata.language); serialization::writeString(bookFile, metadata.coverItemHref); serialization::writeString(bookFile, metadata.textReferenceHref); - // CSS files - serialization::writePod(bookFile, static_cast(metadata.cssFiles.size())); - for (const auto& css : metadata.cssFiles) { - serialization::writeString(bookFile, css); - } // Loop through spine entries, writing LUT positions spineFile.seek(0); @@ -395,16 +385,6 @@ bool BookMetadataCache::load() { serialization::readString(bookFile, coreMetadata.language); serialization::readString(bookFile, coreMetadata.coverItemHref); serialization::readString(bookFile, coreMetadata.textReferenceHref); - // CSS files - uint16_t cssCount; - serialization::readPod(bookFile, cssCount); - coreMetadata.cssFiles.clear(); - coreMetadata.cssFiles.reserve(cssCount); - for (uint16_t i = 0; i < cssCount; i++) { - std::string cssPath; - serialization::readString(bookFile, cssPath); - coreMetadata.cssFiles.push_back(std::move(cssPath)); - } loaded = true; Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount); diff --git a/lib/Epub/Epub/BookMetadataCache.h b/lib/Epub/Epub/BookMetadataCache.h index b5ac9385..20ce6559 100644 --- a/lib/Epub/Epub/BookMetadataCache.h +++ b/lib/Epub/Epub/BookMetadataCache.h @@ -14,7 +14,6 @@ class BookMetadataCache { std::string language; std::string coverItemHref; std::string textReferenceHref; - std::vector cssFiles; }; struct SpineEntry { From 9e1356bb92438ee851af64f9dccf6f9f29e36b72 Mon Sep 17 00:00:00 2001 From: Jake Kenneally Date: Tue, 3 Feb 2026 19:45:12 -0500 Subject: [PATCH 20/20] perf: skip CSS loading in Sleep and Home activities Both activities only need book metadata (title, author) and cover image. Pass skipLoadingCss=true to Epub::load() to avoid unnecessary CSS parsing and caching operations. --- src/activities/boot_sleep/SleepActivity.cpp | 3 ++- src/activities/home/HomeActivity.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/activities/boot_sleep/SleepActivity.cpp b/src/activities/boot_sleep/SleepActivity.cpp index 7ffc5851..39460435 100644 --- a/src/activities/boot_sleep/SleepActivity.cpp +++ b/src/activities/boot_sleep/SleepActivity.cpp @@ -238,7 +238,8 @@ void SleepActivity::renderCoverSleepScreen() const { } else if (StringUtils::checkFileExtension(APP_STATE.openEpubPath, ".epub")) { // Handle EPUB file Epub lastEpub(APP_STATE.openEpubPath, "/.crosspoint"); - if (!lastEpub.load()) { + // Skip loading css since we only need metadata here + if (!lastEpub.load(true, true)) { Serial.println("[SLP] Failed to load last epub"); return renderDefaultSleepScreen(); } diff --git a/src/activities/home/HomeActivity.cpp b/src/activities/home/HomeActivity.cpp index 678af7cb..a6aa443b 100644 --- a/src/activities/home/HomeActivity.cpp +++ b/src/activities/home/HomeActivity.cpp @@ -52,7 +52,8 @@ void HomeActivity::onEnter() { // If epub, try to load the metadata for title/author and cover if (StringUtils::checkFileExtension(lastBookTitle, ".epub")) { Epub epub(APP_STATE.openEpubPath, "/.crosspoint"); - epub.load(false); + // Skip loading css since we only need metadata here + epub.load(false, true); if (!epub.getTitle().empty()) { lastBookTitle = std::string(epub.getTitle()); }