This commit is contained in:
Jake Kenneally 2026-01-27 20:29:34 -06:00 committed by GitHub
commit 52c788891d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 1271 additions and 67 deletions

View File

@ -86,6 +86,9 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
tocNavItem = opfParser.tocNavPath;
}
// Copy CSS files to metadata
bookMetadata.cssFiles = opfParser.cssFiles;
Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis());
return true;
}
@ -204,6 +207,55 @@ bool Epub::parseTocNavFile() const {
return true;
}
bool Epub::parseCssFiles() {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] Cannot parse CSS, cache not loaded\n", millis());
return false;
}
// Always create CssParser - needed for inline style parsing even without CSS files
cssParser.reset(new CssParser());
const auto& cssFiles = bookMetadataCache->coreMetadata.cssFiles;
if (cssFiles.empty()) {
Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis());
return true;
}
for (const auto& cssPath : cssFiles) {
Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str());
// Extract CSS file to temp location
const auto tmpCssPath = getCachePath() + "/.tmp.css";
FsFile tempCssFile;
if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) {
Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis());
continue;
}
if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) {
Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str());
tempCssFile.close();
SdMan.remove(tmpCssPath.c_str());
continue;
}
tempCssFile.close();
// Parse the CSS file
if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) {
Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis());
SdMan.remove(tmpCssPath.c_str());
continue;
}
cssParser->loadFromStream(tempCssFile);
tempCssFile.close();
SdMan.remove(tmpCssPath.c_str());
}
Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(),
cssFiles.size());
return true;
}
// load in the meta data for the epub file
bool Epub::load(const bool buildIfMissing) {
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
@ -213,6 +265,8 @@ bool Epub::load(const bool buildIfMissing) {
// Try to load existing cache first
if (bookMetadataCache->load()) {
// Parse CSS files from loaded cache
parseCssFiles();
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true;
}
@ -309,6 +363,9 @@ bool Epub::load(const bool buildIfMissing) {
return false;
}
// Parse CSS files after cache reload
parseCssFiles();
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true;
}

View File

@ -8,6 +8,7 @@
#include <vector>
#include "Epub/BookMetadataCache.h"
#include "Epub/css/CssParser.h"
class ZipFile;
@ -24,11 +25,14 @@ class Epub {
std::string cachePath;
// Spine and TOC cache
std::unique_ptr<BookMetadataCache> bookMetadataCache;
// CSS parser for styling
std::unique_ptr<CssParser> cssParser;
bool findContentOpfFile(std::string* contentOpfFile) const;
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
bool parseTocNcxFile() const;
bool parseTocNavFile() const;
bool parseCssFiles();
public:
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
@ -64,4 +68,5 @@ class Epub {
size_t getBookSize() const;
float calculateProgress(int currentSpineIndex, float currentSpineRead) const;
const CssParser* getCssParser() const { return cssParser.get(); }
};

View File

@ -9,7 +9,7 @@
#include "FsHelpers.h"
namespace {
constexpr uint8_t BOOK_CACHE_VERSION = 5;
constexpr uint8_t BOOK_CACHE_VERSION = 6;
constexpr char bookBinFile[] = "/book.bin";
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
@ -115,9 +115,14 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
constexpr uint32_t headerASize =
sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount);
// Calculate CSS files size: count + each string (length + data)
uint32_t cssFilesSize = sizeof(uint16_t); // count
for (const auto& css : metadata.cssFiles) {
cssFilesSize += sizeof(uint32_t) + css.size();
}
const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.language.size() +
metadata.coverItemHref.size() + metadata.textReferenceHref.size() +
sizeof(uint32_t) * 5;
sizeof(uint32_t) * 5 + cssFilesSize;
const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount;
const uint32_t lutOffset = headerASize + metadataSize;
@ -132,6 +137,11 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
serialization::writeString(bookFile, metadata.language);
serialization::writeString(bookFile, metadata.coverItemHref);
serialization::writeString(bookFile, metadata.textReferenceHref);
// CSS files
serialization::writePod(bookFile, static_cast<uint16_t>(metadata.cssFiles.size()));
for (const auto& css : metadata.cssFiles) {
serialization::writeString(bookFile, css);
}
// Loop through spine entries, writing LUT positions
spineFile.seek(0);
@ -385,6 +395,16 @@ bool BookMetadataCache::load() {
serialization::readString(bookFile, coreMetadata.language);
serialization::readString(bookFile, coreMetadata.coverItemHref);
serialization::readString(bookFile, coreMetadata.textReferenceHref);
// CSS files
uint16_t cssCount;
serialization::readPod(bookFile, cssCount);
coreMetadata.cssFiles.clear();
coreMetadata.cssFiles.reserve(cssCount);
for (uint16_t i = 0; i < cssCount; i++) {
std::string cssPath;
serialization::readString(bookFile, cssPath);
coreMetadata.cssFiles.push_back(std::move(cssPath));
}
loaded = true;
Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);

View File

@ -14,6 +14,7 @@ class BookMetadataCache {
std::string language;
std::string coverItemHref;
std::string textReferenceHref;
std::vector<std::string> cssFiles;
};
struct SpineEntry {

View File

@ -49,11 +49,12 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s
} // namespace
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) {
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) {
if (word.empty()) return;
words.push_back(std::move(word));
wordStyles.push_back(fontStyle);
wordUnderlines.push_back(underline);
}
// Consumes data to minimize memory usage
@ -92,9 +93,21 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
auto wordsIt = words.begin();
auto wordStylesIt = wordStyles.begin();
bool isFirst = true;
while (wordsIt != words.end()) {
wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt));
uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt);
// Add CSS text-indent to first word width
if (isFirst && blockStyle.textIndent > 0 && (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) &&
!extraParagraphSpacing) {
width += static_cast<uint16_t>(blockStyle.textIndent);
isFirst = false;
} else {
isFirst = false;
}
wordWidths.push_back(width);
std::advance(wordsIt, 1);
std::advance(wordStylesIt, 1);
@ -200,7 +213,10 @@ void ParsedText::applyParagraphIndent() {
return;
}
if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) {
if (blockStyle.textIndent > 0) {
// CSS text-indent is handled via first word width adjustment
// We'll add the indent value directly to the first word's width
} else if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) {
words.front().insert(0, "\xe2\x80\x83");
}
}
@ -369,14 +385,18 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
// Iterators always start at the beginning as we are moving content with splice below
auto wordEndIt = words.begin();
auto wordStyleEndIt = wordStyles.begin();
auto wordUnderlineEndIt = wordUnderlines.begin();
std::advance(wordEndIt, lineWordCount);
std::advance(wordStyleEndIt, lineWordCount);
std::advance(wordUnderlineEndIt, lineWordCount);
// *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
std::list<std::string> lineWords;
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
std::list<EpdFontFamily::Style> lineWordStyles;
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
std::list<bool> lineWordUnderlines;
lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt);
for (auto& word : lineWords) {
if (containsSoftHyphen(word)) {
@ -384,5 +404,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
}
}
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style));
}
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style,
blockStyle, std::move(lineWordUnderlines)));
}

View File

@ -8,6 +8,7 @@
#include <string>
#include <vector>
#include "blocks/BlockStyle.h"
#include "blocks/TextBlock.h"
class GfxRenderer;
@ -15,7 +16,9 @@ class GfxRenderer;
class ParsedText {
std::list<std::string> words;
std::list<EpdFontFamily::Style> wordStyles;
std::list<bool> wordUnderlines; // Track underline per word
TextBlock::Style style;
BlockStyle blockStyle;
bool extraParagraphSpacing;
bool hyphenationEnabled;
@ -33,13 +36,18 @@ class ParsedText {
public:
explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing,
const bool hyphenationEnabled = false)
: style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {}
const bool hyphenationEnabled = false, const BlockStyle& blockStyle = BlockStyle())
: style(style),
blockStyle(blockStyle),
extraParagraphSpacing(extraParagraphSpacing),
hyphenationEnabled(hyphenationEnabled) {}
~ParsedText() = default;
void addWord(std::string word, EpdFontFamily::Style fontStyle);
void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false);
void setStyle(const TextBlock::Style style) { this->style = style; }
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
TextBlock::Style getStyle() const { return style; }
const BlockStyle& getBlockStyle() const { return blockStyle; }
size_t size() const { return words.size(); }
bool isEmpty() const { return words.empty(); }
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth,

View File

@ -8,7 +8,7 @@
#include "parsers/ChapterHtmlSlimParser.h"
namespace {
constexpr uint8_t SECTION_FILE_VERSION = 10;
constexpr uint8_t SECTION_FILE_VERSION = 11;
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) +
sizeof(uint32_t);
@ -186,8 +186,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
ChapterHtmlSlimParser visitor(
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
viewportHeight, hyphenationEnabled,
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
progressFn);
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn,
epub->getCssParser());
Hyphenator::setPreferredLanguage(epub->getLanguage());
success = visitor.parseAndBuildPages();

View File

@ -0,0 +1,17 @@
#pragma once
#include <cstdint>
/**
* BlockStyle - Block-level CSS properties for paragraphs
*
* Used to track margin/padding spacing and text indentation for block elements.
* Padding is treated similarly to margins for rendering purposes.
*/
struct BlockStyle {
int8_t marginTop = 0; // 0-2 lines
int8_t marginBottom = 0; // 0-2 lines
int8_t paddingTop = 0; // 0-2 lines (treated same as margin)
int8_t paddingBottom = 0; // 0-2 lines (treated same as margin)
int16_t textIndent = 0; // pixels
};

View File

@ -14,13 +14,40 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
auto wordIt = words.begin();
auto wordStylesIt = wordStyles.begin();
auto wordXposIt = wordXpos.begin();
auto wordUnderlineIt = wordUnderlines.begin();
for (size_t i = 0; i < words.size(); i++) {
renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt);
const int wordX = *wordXposIt + x;
renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt);
// Draw underline if word is underlined
if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) {
const std::string& w = *wordIt;
const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt);
// y is the top of the text line; add ascender to reach baseline, then offset 2px below
const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2;
int startX = wordX;
int underlineWidth = fullWordWidth;
// if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line
if (w.size() >= 3 && static_cast<uint8_t>(w[0]) == 0xE2 && static_cast<uint8_t>(w[1]) == 0x80 &&
static_cast<uint8_t>(w[2]) == 0x83) {
const char* visiblePtr = w.c_str() + 3;
const int prefixWidth = renderer.getIndentWidth(fontId, std::string("\xe2\x80\x83").c_str());
const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt);
startX = wordX + prefixWidth;
underlineWidth = visibleWidth;
}
renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true);
}
std::advance(wordIt, 1);
std::advance(wordStylesIt, 1);
std::advance(wordXposIt, 1);
if (wordUnderlineIt != wordUnderlines.end()) {
std::advance(wordUnderlineIt, 1);
}
}
}
@ -37,9 +64,35 @@ bool TextBlock::serialize(FsFile& file) const {
for (auto x : wordXpos) serialization::writePod(file, x);
for (auto s : wordStyles) serialization::writePod(file, s);
// Block style
// Underline flags (packed as bytes, 8 words per byte)
uint8_t underlineByte = 0;
int bitIndex = 0;
auto underlineIt = wordUnderlines.begin();
for (size_t i = 0; i < words.size(); i++) {
if (underlineIt != wordUnderlines.end() && *underlineIt) {
underlineByte |= 1 << bitIndex;
}
bitIndex++;
if (bitIndex == 8 || i == words.size() - 1) {
serialization::writePod(file, underlineByte);
underlineByte = 0;
bitIndex = 0;
}
if (underlineIt != wordUnderlines.end()) {
++underlineIt;
}
}
// Block style (alignment)
serialization::writePod(file, style);
// Block style (margins/padding/indent)
serialization::writePod(file, blockStyle.marginTop);
serialization::writePod(file, blockStyle.marginBottom);
serialization::writePod(file, blockStyle.paddingTop);
serialization::writePod(file, blockStyle.paddingBottom);
serialization::writePod(file, blockStyle.textIndent);
return true;
}
@ -48,7 +101,9 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
std::list<std::string> words;
std::list<uint16_t> wordXpos;
std::list<EpdFontFamily::Style> wordStyles;
std::list<bool> wordUnderlines;
Style style;
BlockStyle blockStyle;
// Word count
serialization::readPod(file, wc);
@ -67,8 +122,29 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
for (auto& x : wordXpos) serialization::readPod(file, x);
for (auto& s : wordStyles) serialization::readPod(file, s);
// Block style
// Underline flags (packed as bytes, 8 words per byte)
wordUnderlines.resize(wc, false);
auto underlineIt = wordUnderlines.begin();
const int bytesNeeded = (wc + 7) / 8;
for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) {
uint8_t underlineByte;
serialization::readPod(file, underlineByte);
for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) {
*underlineIt = (underlineByte & 1 << bit) != 0;
++underlineIt;
}
}
// Block style (alignment)
serialization::readPod(file, style);
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style));
// Block style (margins/padding/indent)
serialization::readPod(file, blockStyle.marginTop);
serialization::readPod(file, blockStyle.marginBottom);
serialization::readPod(file, blockStyle.paddingTop);
serialization::readPod(file, blockStyle.paddingBottom);
serialization::readPod(file, blockStyle.textIndent);
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style,
blockStyle, std::move(wordUnderlines)));
}

View File

@ -7,6 +7,7 @@
#include <string>
#include "Block.h"
#include "BlockStyle.h"
// Represents a line of text on a page
class TextBlock final : public Block {
@ -22,15 +23,30 @@ class TextBlock final : public Block {
std::list<std::string> words;
std::list<uint16_t> wordXpos;
std::list<EpdFontFamily::Style> wordStyles;
std::list<bool> wordUnderlines; // Track underline per word
Style style;
BlockStyle blockStyle;
public:
explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos,
std::list<EpdFontFamily::Style> word_styles, const Style style)
: words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {}
std::list<EpdFontFamily::Style> word_styles, const Style style,
const BlockStyle& blockStyle = BlockStyle(), std::list<bool> word_underlines = std::list<bool>())
: words(std::move(words)),
wordXpos(std::move(word_xpos)),
wordStyles(std::move(word_styles)),
wordUnderlines(std::move(word_underlines)),
style(style),
blockStyle(blockStyle) {
// Ensure underlines list matches words list size
while (this->wordUnderlines.size() < this->words.size()) {
this->wordUnderlines.push_back(false);
}
}
~TextBlock() override = default;
void setStyle(const Style style) { this->style = style; }
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
Style getStyle() const { return style; }
const BlockStyle& getBlockStyle() const { return blockStyle; }
bool isEmpty() override { return words.empty(); }
void layout(GfxRenderer& renderer) override {};
// given a renderer works out where to break the words into lines

View File

@ -0,0 +1,498 @@
#include "CssParser.h"
#include <HardwareSerial.h>
#include <algorithm>
#include <cctype>
namespace {
// Buffer size for reading CSS files
constexpr size_t READ_BUFFER_SIZE = 512;
// Maximum CSS file size we'll process (prevent memory issues)
constexpr size_t MAX_CSS_SIZE = 64 * 1024;
// Check if character is CSS whitespace
bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; }
// Read entire file into string (with size limit)
std::string readFileContent(FsFile& file) {
std::string content;
content.reserve(std::min(static_cast<size_t>(file.size()), MAX_CSS_SIZE));
char buffer[READ_BUFFER_SIZE];
while (file.available() && content.size() < MAX_CSS_SIZE) {
const int bytesRead = file.read(buffer, sizeof(buffer));
if (bytesRead <= 0) break;
content.append(buffer, bytesRead);
}
return content;
}
// Remove CSS comments (/* ... */) from content
std::string stripComments(const std::string& css) {
std::string result;
result.reserve(css.size());
size_t pos = 0;
while (pos < css.size()) {
// Look for start of comment
if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') {
// Find end of comment
const size_t endPos = css.find("*/", pos + 2);
if (endPos == std::string::npos) {
// Unterminated comment - skip rest of file
break;
}
pos = endPos + 2;
} else {
result.push_back(css[pos]);
++pos;
}
}
return result;
}
// Skip @-rules (like @media, @import, @font-face)
// Returns position after the @-rule
size_t skipAtRule(const std::string& css, const size_t start) {
// Find the end - either semicolon (simple @-rule) or matching brace
size_t pos = start + 1; // Skip the '@'
// Skip identifier
while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) {
++pos;
}
// Look for { or ;
int braceDepth = 0;
while (pos < css.size()) {
const char c = css[pos];
if (c == '{') {
++braceDepth;
} else if (c == '}') {
--braceDepth;
if (braceDepth == 0) {
return pos + 1;
}
} else if (c == ';' && braceDepth == 0) {
return pos + 1;
}
++pos;
}
return css.size();
}
// Extract next rule from CSS content
// Returns true if a rule was found, with selector and body filled
bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) {
selector.clear();
body.clear();
// Skip whitespace and @-rules until we find a regular rule
while (pos < css.size()) {
// Skip whitespace
while (pos < css.size() && isCssWhitespace(css[pos])) {
++pos;
}
if (pos >= css.size()) return false;
// Handle @-rules iteratively (avoids recursion/stack overflow)
if (css[pos] == '@') {
pos = skipAtRule(css, pos);
continue; // Try again after skipping the @-rule
}
break; // Found start of a regular rule
}
if (pos >= css.size()) return false;
// Find opening brace
const size_t bracePos = css.find('{', pos);
if (bracePos == std::string::npos) return false;
// Extract selector (everything before the brace)
selector = css.substr(pos, bracePos - pos);
// Find matching closing brace
int depth = 1;
const size_t bodyStart = bracePos + 1;
size_t bodyEnd = bodyStart;
while (bodyEnd < css.size() && depth > 0) {
if (css[bodyEnd] == '{')
++depth;
else if (css[bodyEnd] == '}')
--depth;
++bodyEnd;
}
// Extract body (between braces)
if (bodyEnd > bodyStart) {
body = css.substr(bodyStart, bodyEnd - bodyStart - 1);
}
pos = bodyEnd;
return true;
}
} // anonymous namespace
// String utilities implementation
std::string CssParser::normalized(const std::string& s) {
std::string result;
result.reserve(s.size());
bool inSpace = true; // Start true to skip leading space
for (const char c : s) {
if (isCssWhitespace(c)) {
if (!inSpace) {
result.push_back(' ');
inSpace = true;
}
} else {
result.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(c))));
inSpace = false;
}
}
// Remove trailing space
if (!result.empty() && result.back() == ' ') {
result.pop_back();
}
return result;
}
std::vector<std::string> CssParser::splitOnChar(const std::string& s, const char delimiter) {
std::vector<std::string> parts;
size_t start = 0;
for (size_t i = 0; i <= s.size(); ++i) {
if (i == s.size() || s[i] == delimiter) {
std::string part = s.substr(start, i - start);
std::string trimmed = normalized(part);
if (!trimmed.empty()) {
parts.push_back(trimmed);
}
start = i + 1;
}
}
return parts;
}
std::vector<std::string> CssParser::splitWhitespace(const std::string& s) {
std::vector<std::string> parts;
size_t start = 0;
bool inWord = false;
for (size_t i = 0; i <= s.size(); ++i) {
const bool isSpace = i == s.size() || isCssWhitespace(s[i]);
if (isSpace && inWord) {
parts.push_back(s.substr(start, i - start));
inWord = false;
} else if (!isSpace && !inWord) {
start = i;
inWord = true;
}
}
return parts;
}
// Property value interpreters
TextAlign CssParser::interpretAlignment(const std::string& val) {
const std::string v = normalized(val);
if (v == "left" || v == "start") return TextAlign::Left;
if (v == "right" || v == "end") return TextAlign::Right;
if (v == "center") return TextAlign::Center;
if (v == "justify") return TextAlign::Justify;
return TextAlign::None;
}
CssFontStyle CssParser::interpretFontStyle(const std::string& val) {
const std::string v = normalized(val);
if (v == "italic" || v == "oblique") return CssFontStyle::Italic;
return CssFontStyle::Normal;
}
CssFontWeight CssParser::interpretFontWeight(const std::string& val) {
const std::string v = normalized(val);
// Named values
if (v == "bold" || v == "bolder") return CssFontWeight::Bold;
if (v == "normal" || v == "lighter") return CssFontWeight::Normal;
// Numeric values: 100-900
// CSS spec: 400 = normal, 700 = bold
// We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative)
char* endPtr = nullptr;
const long numericWeight = std::strtol(v.c_str(), &endPtr, 10);
// If we parsed a number and consumed the whole string
if (endPtr != v.c_str() && *endPtr == '\0') {
return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal;
}
return CssFontWeight::Normal;
}
CssTextDecoration CssParser::interpretDecoration(const std::string& val) {
const std::string v = normalized(val);
// text-decoration can have multiple space-separated values
if (v.find("underline") != std::string::npos) {
return CssTextDecoration::Underline;
}
return CssTextDecoration::None;
}
float CssParser::interpretLength(const std::string& val, const float emSize) {
const std::string v = normalized(val);
if (v.empty()) return 0.0f;
// Determine unit and multiplier
float multiplier = 1.0f;
size_t unitStart = v.size();
// Find where the number ends
for (size_t i = 0; i < v.size(); ++i) {
const char c = v[i];
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
unitStart = i;
break;
}
}
const std::string numPart = v.substr(0, unitStart);
const std::string unitPart = v.substr(unitStart);
// Handle units
if (unitPart == "em" || unitPart == "rem") {
multiplier = emSize;
} else if (unitPart == "pt") {
multiplier = 1.33f; // Approximate pt to px conversion
}
// px is default (multiplier = 1.0)
char* endPtr = nullptr;
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
if (endPtr == numPart.c_str()) return 0.0f; // No number parsed
return numericValue * multiplier;
}
int8_t CssParser::interpretSpacing(const std::string& val) {
const std::string v = normalized(val);
if (v.empty()) return 0;
// For spacing, we convert to "lines" (discrete units for e-ink)
// 1em ≈ 1 line, percentages based on ~30 lines per page
float multiplier = 0.0f;
size_t unitStart = v.size();
for (size_t i = 0; i < v.size(); ++i) {
const char c = v[i];
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
unitStart = i;
break;
}
}
const std::string numPart = v.substr(0, unitStart);
const std::string unitPart = v.substr(unitStart);
if (unitPart == "em" || unitPart == "rem") {
multiplier = 1.0f; // 1em = 1 line
} else if (unitPart == "%") {
multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines
} else {
return 0; // Unsupported unit for spacing
}
char* endPtr = nullptr;
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
if (endPtr == numPart.c_str()) return 0;
int lines = static_cast<int>(numericValue * multiplier);
// Clamp to reasonable range (0-2 lines)
if (lines < 0) lines = 0;
if (lines > 2) lines = 2;
return static_cast<int8_t>(lines);
}
// Declaration parsing
CssStyle CssParser::parseDeclarations(const std::string& declBlock) {
CssStyle style;
// Split declarations by semicolon
const auto declarations = splitOnChar(declBlock, ';');
for (const auto& decl : declarations) {
// Find colon separator
const size_t colonPos = decl.find(':');
if (colonPos == std::string::npos || colonPos == 0) continue;
std::string propName = normalized(decl.substr(0, colonPos));
std::string propValue = normalized(decl.substr(colonPos + 1));
if (propName.empty() || propValue.empty()) continue;
// Match property and set value
if (propName == "text-align") {
const TextAlign align = interpretAlignment(propValue);
if (align != TextAlign::None) {
style.alignment = align;
style.defined.alignment = 1;
}
} else if (propName == "font-style") {
style.fontStyle = interpretFontStyle(propValue);
style.defined.fontStyle = 1;
} else if (propName == "font-weight") {
style.fontWeight = interpretFontWeight(propValue);
style.defined.fontWeight = 1;
} else if (propName == "text-decoration" || propName == "text-decoration-line") {
style.decoration = interpretDecoration(propValue);
style.defined.decoration = 1;
} else if (propName == "text-indent") {
style.indentPixels = interpretLength(propValue);
style.defined.indent = 1;
} else if (propName == "margin-top") {
const int8_t spacing = interpretSpacing(propValue);
if (spacing > 0) {
style.marginTop = spacing;
style.defined.marginTop = 1;
}
} else if (propName == "margin-bottom") {
const int8_t spacing = interpretSpacing(propValue);
if (spacing > 0) {
style.marginBottom = spacing;
style.defined.marginBottom = 1;
}
} else if (propName == "padding-top") {
const int8_t spacing = interpretSpacing(propValue);
if (spacing > 0) {
style.paddingTop = spacing;
style.defined.paddingTop = 1;
}
} else if (propName == "padding-bottom") {
const int8_t spacing = interpretSpacing(propValue);
if (spacing > 0) {
style.paddingBottom = spacing;
style.defined.paddingBottom = 1;
}
}
}
return style;
}
// Rule processing
void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) {
const CssStyle style = parseDeclarations(declarations);
// Only store if any properties were set
if (!style.defined.anySet()) return;
// Handle comma-separated selectors
const auto selectors = splitOnChar(selectorGroup, ',');
for (const auto& sel : selectors) {
// Normalize the selector
std::string key = normalized(sel);
if (key.empty()) continue;
// Store or merge with existing
auto it = rulesBySelector_.find(key);
if (it != rulesBySelector_.end()) {
it->second.applyOver(style);
} else {
rulesBySelector_[key] = style;
}
}
}
// Main parsing entry point
bool CssParser::loadFromStream(FsFile& source) {
if (!source) {
Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis());
return false;
}
// Read file content
const std::string content = readFileContent(source);
if (content.empty()) {
return true; // Empty file is valid
}
// Remove comments
const std::string cleaned = stripComments(content);
// Parse rules
size_t pos = 0;
std::string selector, body;
while (extractNextRule(cleaned, pos, selector, body)) {
processRuleBlock(selector, body);
}
Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size());
return true;
}
// Style resolution
CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const {
CssStyle result;
const std::string tag = normalized(tagName);
// 1. Apply element-level style (lowest priority)
const auto tagIt = rulesBySelector_.find(tag);
if (tagIt != rulesBySelector_.end()) {
result.applyOver(tagIt->second);
}
// 2. Apply class styles (medium priority)
if (!classAttr.empty()) {
const auto classes = splitWhitespace(classAttr);
for (const auto& cls : classes) {
std::string classKey = "." + normalized(cls);
auto classIt = rulesBySelector_.find(classKey);
if (classIt != rulesBySelector_.end()) {
result.applyOver(classIt->second);
}
}
// 3. Apply element.class styles (higher priority)
for (const auto& cls : classes) {
std::string combinedKey = tag + "." + normalized(cls);
auto combinedIt = rulesBySelector_.find(combinedKey);
if (combinedIt != rulesBySelector_.end()) {
result.applyOver(combinedIt->second);
}
}
}
return result;
}
// Inline style parsing (static - doesn't need rule database)
CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); }

View File

@ -0,0 +1,99 @@
#pragma once
#include <SdFat.h>
#include <string>
#include <unordered_map>
#include <vector>
#include "CssStyle.h"
/**
* Lightweight CSS parser for EPUB stylesheets
*
* Parses CSS files and extracts styling information relevant for e-ink display.
* Uses a two-phase approach: first tokenizes the CSS content, then builds
* a rule database that can be queried during HTML parsing.
*
* Supported selectors:
* - Element selectors: p, div, h1, etc.
* - Class selectors: .classname
* - Combined: element.classname
* - Grouped: selector1, selector2 { }
*
* Not supported (silently ignored):
* - Descendant/child selectors
* - Pseudo-classes and pseudo-elements
* - Media queries (content is skipped)
* - @import, @font-face, etc.
*/
class CssParser {
public:
CssParser() = default;
~CssParser() = default;
// Non-copyable
CssParser(const CssParser&) = delete;
CssParser& operator=(const CssParser&) = delete;
/**
* Load and parse CSS from a file stream.
* Can be called multiple times to accumulate rules from multiple stylesheets.
* @param source Open file handle to read from
* @return true if parsing completed (even if no rules found)
*/
bool loadFromStream(FsFile& source);
/**
* Look up the style for an HTML element, considering tag name and class attributes.
* Applies CSS cascade: element style < class style < element.class style
*
* @param tagName The HTML element name (e.g., "p", "div")
* @param classAttr The class attribute value (may contain multiple space-separated classes)
* @return Combined style with all applicable rules merged
*/
[[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const;
/**
* Parse an inline style attribute string.
* @param styleValue The value of a style="" attribute
* @return Parsed style properties
*/
[[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue);
/**
* Check if any rules have been loaded
*/
[[nodiscard]] bool empty() const { return rulesBySelector_.empty(); }
/**
* Get count of loaded rule sets
*/
[[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); }
/**
* Clear all loaded rules
*/
void clear() { rulesBySelector_.clear(); }
private:
// Storage: maps normalized selector -> style properties
std::unordered_map<std::string, CssStyle> rulesBySelector_;
// Internal parsing helpers
void processRuleBlock(const std::string& selectorGroup, const std::string& declarations);
static CssStyle parseDeclarations(const std::string& declBlock);
// Individual property value parsers
static TextAlign interpretAlignment(const std::string& val);
static CssFontStyle interpretFontStyle(const std::string& val);
static CssFontWeight interpretFontWeight(const std::string& val);
static CssTextDecoration interpretDecoration(const std::string& val);
static float interpretLength(const std::string& val, float emSize = 16.0f);
static int8_t interpretSpacing(const std::string& val);
// String utilities
static std::string normalized(const std::string& s);
static std::vector<std::string> splitOnChar(const std::string& s, char delimiter);
static std::vector<std::string> splitWhitespace(const std::string& s);
};

View File

@ -0,0 +1,133 @@
#pragma once
#include <cstdint>
// Text alignment options matching CSS text-align property
enum class TextAlign : uint8_t { None = 0, Left = 1, Right = 2, Center = 3, Justify = 4 };
// Font style options matching CSS font-style property
enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 };
// Font weight options - CSS supports 100-900, we simplify to normal/bold
enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 };
// Text decoration options
enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 };
// Bitmask for tracking which properties have been explicitly set
struct CssPropertyFlags {
uint16_t alignment : 1;
uint16_t fontStyle : 1;
uint16_t fontWeight : 1;
uint16_t decoration : 1;
uint16_t indent : 1;
uint16_t marginTop : 1;
uint16_t marginBottom : 1;
uint16_t paddingTop : 1;
uint16_t paddingBottom : 1;
uint16_t reserved : 7;
CssPropertyFlags()
: alignment(0),
fontStyle(0),
fontWeight(0),
decoration(0),
indent(0),
marginTop(0),
marginBottom(0),
paddingTop(0),
paddingBottom(0),
reserved(0) {}
[[nodiscard]] bool anySet() const {
return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || paddingTop ||
paddingBottom;
}
void clearAll() {
alignment = fontStyle = fontWeight = decoration = indent = 0;
marginTop = marginBottom = paddingTop = paddingBottom = 0;
}
};
// Represents a collection of CSS style properties
// Only stores properties relevant to e-ink text rendering
struct CssStyle {
TextAlign alignment = TextAlign::None;
CssFontStyle fontStyle = CssFontStyle::Normal;
CssFontWeight fontWeight = CssFontWeight::Normal;
CssTextDecoration decoration = CssTextDecoration::None;
float indentPixels = 0.0f; // First-line indent in pixels
int8_t marginTop = 0; // Vertical spacing before block (in lines, 0-2)
int8_t marginBottom = 0; // Vertical spacing after block (in lines, 0-2)
int8_t paddingTop = 0; // Padding before (in lines, 0-2)
int8_t paddingBottom = 0; // Padding after (in lines, 0-2)
CssPropertyFlags defined; // Tracks which properties were explicitly set
// Apply properties from another style, only overwriting if the other style
// has that property explicitly defined
void applyOver(const CssStyle& base) {
if (base.defined.alignment) {
alignment = base.alignment;
defined.alignment = 1;
}
if (base.defined.fontStyle) {
fontStyle = base.fontStyle;
defined.fontStyle = 1;
}
if (base.defined.fontWeight) {
fontWeight = base.fontWeight;
defined.fontWeight = 1;
}
if (base.defined.decoration) {
decoration = base.decoration;
defined.decoration = 1;
}
if (base.defined.indent) {
indentPixels = base.indentPixels;
defined.indent = 1;
}
if (base.defined.marginTop) {
marginTop = base.marginTop;
defined.marginTop = 1;
}
if (base.defined.marginBottom) {
marginBottom = base.marginBottom;
defined.marginBottom = 1;
}
if (base.defined.paddingTop) {
paddingTop = base.paddingTop;
defined.paddingTop = 1;
}
if (base.defined.paddingBottom) {
paddingBottom = base.paddingBottom;
defined.paddingBottom = 1;
}
}
// Compatibility accessors for existing code that uses hasX pattern
[[nodiscard]] bool hasTextAlign() const { return defined.alignment; }
[[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; }
[[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; }
[[nodiscard]] bool hasTextDecoration() const { return defined.decoration; }
[[nodiscard]] bool hasTextIndent() const { return defined.indent; }
[[nodiscard]] bool hasMarginTop() const { return defined.marginTop; }
[[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; }
[[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; }
[[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; }
// Merge another style (alias for applyOver for compatibility)
void merge(const CssStyle& other) { applyOver(other); }
void reset() {
alignment = TextAlign::None;
fontStyle = CssFontStyle::Normal;
fontWeight = CssFontWeight::Normal;
decoration = CssTextDecoration::None;
indentPixels = 0.0f;
marginTop = marginBottom = paddingTop = paddingBottom = 0;
defined.clearAll();
}
};

View File

@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
const char* ITALIC_TAGS[] = {"i", "em"};
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
const char* UNDERLINE_TAGS[] = {"u", "ins"};
constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]);
const char* IMAGE_TAGS[] = {"img"};
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
@ -40,37 +43,78 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
return false;
}
// Create a BlockStyle from CSS style properties
BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle) {
BlockStyle blockStyle;
blockStyle.marginTop = static_cast<int8_t>(cssStyle.marginTop + cssStyle.paddingTop);
blockStyle.marginBottom = static_cast<int8_t>(cssStyle.marginBottom + cssStyle.paddingBottom);
blockStyle.paddingTop = cssStyle.paddingTop;
blockStyle.paddingBottom = cssStyle.paddingBottom;
blockStyle.textIndent = static_cast<int16_t>(cssStyle.indentPixels);
return blockStyle;
}
// Update effective bold/italic/underline based on block style and inline style stack
void ChapterHtmlSlimParser::updateEffectiveInlineStyle() {
// Start with block-level styles
effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold;
effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic;
effectiveUnderline =
currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline;
// Apply inline style stack in order
for (const auto& entry : inlineStyleStack) {
if (entry.hasBold) {
effectiveBold = entry.bold;
}
if (entry.hasItalic) {
effectiveItalic = entry.italic;
}
if (entry.hasUnderline) {
effectiveUnderline = entry.underline;
}
}
}
// flush the contents of partWordBuffer to currentTextBlock
void ChapterHtmlSlimParser::flushPartWordBuffer() {
// determine font style
// Determine font style from depth-based tracking and CSS effective style
const bool isBold = boldUntilDepth < depth || effectiveBold;
const bool isItalic = italicUntilDepth < depth || effectiveItalic;
const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline;
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (boldUntilDepth < depth && italicUntilDepth < depth) {
if (isBold && isItalic) {
fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (boldUntilDepth < depth) {
} else if (isBold) {
fontStyle = EpdFontFamily::BOLD;
} else if (italicUntilDepth < depth) {
} else if (isItalic) {
fontStyle = EpdFontFamily::ITALIC;
}
// flush the buffer
partWordBuffer[partWordBufferIndex] = '\0';
currentTextBlock->addWord(partWordBuffer, fontStyle);
currentTextBlock->addWord(partWordBuffer, fontStyle, isUnderline);
partWordBufferIndex = 0;
}
// start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) {
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) {
currentTextBlock->setStyle(style);
currentTextBlock->setBlockStyle(blockStyle);
return;
}
makePages();
}
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled));
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled, blockStyle));
}
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); }
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
@ -80,6 +124,19 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
return;
}
// Extract class and style attributes for CSS processing
std::string classAttr;
std::string styleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "class") == 0) {
classAttr = atts[i + 1];
} else if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
}
}
}
// Special handling for tables - show placeholder text instead of dropping silently
if (strcmp(name, "table") == 0) {
// Add placeholder text
@ -141,43 +198,155 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
}
}
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->startNewTextBlock(TextBlock::CENTER_ALIGN);
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->depth += 1;
return;
// Determine if this is a block element
bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
// Compute CSS style for this element
CssStyle cssStyle;
if (self->cssParser) {
// Get combined tag + class styles
cssStyle = self->cssParser->resolveStyle(name, classAttr);
// Merge inline style (highest priority)
if (!styleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
cssStyle.merge(inlineStyle);
}
}
if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
// Headers: center aligned, bold, apply CSS overrides
TextBlock::Style alignment = TextBlock::CENTER_ALIGN;
if (cssStyle.hasTextAlign()) {
switch (cssStyle.alignment) {
case TextAlign::Left:
alignment = TextBlock::LEFT_ALIGN;
break;
case TextAlign::Right:
alignment = TextBlock::RIGHT_ALIGN;
break;
case TextAlign::Center:
alignment = TextBlock::CENTER_ALIGN;
break;
case TextAlign::Justify:
alignment = TextBlock::JUSTIFIED;
break;
default:
break;
}
}
self->currentBlockStyle = cssStyle;
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle));
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->updateEffectiveInlineStyle();
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (strcmp(name, "br") == 0) {
if (self->partWordBufferIndex > 0) {
// flush word preceding <br/> to currentTextBlock before calling startNewTextBlock
self->flushPartWordBuffer();
}
self->startNewTextBlock(self->currentTextBlock->getStyle());
self->depth += 1;
return;
} else {
// Determine alignment from CSS or default
auto alignment = static_cast<TextBlock::Style>(self->paragraphAlignment);
if (cssStyle.hasTextAlign()) {
switch (cssStyle.alignment) {
case TextAlign::Left:
alignment = TextBlock::LEFT_ALIGN;
break;
case TextAlign::Right:
alignment = TextBlock::RIGHT_ALIGN;
break;
case TextAlign::Center:
alignment = TextBlock::CENTER_ALIGN;
break;
case TextAlign::Justify:
alignment = TextBlock::JUSTIFIED;
break;
default:
break;
}
}
self->currentBlockStyle = cssStyle;
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle));
self->updateEffectiveInlineStyle();
if (strcmp(name, "li") == 0) {
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR);
}
}
self->startNewTextBlock(static_cast<TextBlock::Style>(self->paragraphAlignment));
if (strcmp(name, "li") == 0) {
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR);
} else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) {
self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
// Push inline style entry for underline tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasUnderline = true;
entry.underline = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
self->depth += 1;
return;
}
if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->depth += 1;
return;
}
if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
// Push inline style entry for bold tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasBold = true;
entry.bold = true;
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
self->depth += 1;
return;
// Push inline style entry for italic tag
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasItalic = true;
entry.italic = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (strcmp(name, "span") == 0 || !isBlockElement) {
// Handle span and other inline elements for CSS styling
if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) {
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
}
}
// Unprocessed tag, just increasing depth and continue forward
@ -239,17 +408,27 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
if (self->partWordBufferIndex > 0) {
// Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file.
// We don't want to flush out content when closing inline tags like <span>.
// Currently this also flushes out on closing <b> and <i> tags, but they are line tags so that shouldn't happen,
// text styling needs to be overhauled to fix it.
const bool shouldBreakText =
matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) ||
matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
// Check if any style state will change after we decrement depth
// If so, we MUST flush the partWordBuffer with the CURRENT style first
// Note: depth hasn't been decremented yet, so we check against (depth - 1)
const bool willPopStyleStack =
!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1;
const bool willClearBold = self->boldUntilDepth == self->depth - 1;
const bool willClearItalic = self->italicUntilDepth == self->depth - 1;
const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1;
if (shouldBreakText) {
const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
// Flush buffer with current style BEFORE any style changes
if (self->partWordBufferIndex > 0) {
// Flush if style will change OR if we're closing a block/structural element
const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) ||
matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) ||
matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 ||
matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
if (shouldFlush) {
self->flushPartWordBuffer();
}
}
@ -261,15 +440,33 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
self->skipUntilDepth = INT_MAX;
}
// Leaving bold
// Leaving bold tag
if (self->boldUntilDepth == self->depth) {
self->boldUntilDepth = INT_MAX;
}
// Leaving italic
// Leaving italic tag
if (self->italicUntilDepth == self->depth) {
self->italicUntilDepth = INT_MAX;
}
// Leaving underline tag
if (self->underlineUntilDepth == self->depth) {
self->underlineUntilDepth = INT_MAX;
}
// Pop from inline style stack if we pushed an entry at this depth
// This handles all inline elements: b, i, u, span, etc.
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
}
// Clear block style when leaving block elements
if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->currentBlockStyle.reset();
self->updateEffectiveInlineStyle();
}
}
bool ChapterHtmlSlimParser::parseAndBuildPages() {
@ -389,10 +586,23 @@ void ChapterHtmlSlimParser::makePages() {
}
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
// Apply marginTop before the paragraph
const BlockStyle& blockStyle = currentTextBlock->getBlockStyle();
if (blockStyle.marginTop > 0) {
currentPageNextY += lineHeight * blockStyle.marginTop;
}
currentTextBlock->layoutAndExtractLines(
renderer, fontId, viewportWidth,
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
// Extra paragraph spacing if enabled
// Apply marginBottom after the paragraph
if (blockStyle.marginBottom > 0) {
currentPageNextY += lineHeight * blockStyle.marginBottom;
}
// Extra paragraph spacing if enabled (default behavior)
if (extraParagraphSpacing) {
currentPageNextY += lineHeight / 2;
}

View File

@ -8,6 +8,8 @@
#include "../ParsedText.h"
#include "../blocks/TextBlock.h"
#include "../css/CssParser.h"
#include "../css/CssStyle.h"
class Page;
class GfxRenderer;
@ -23,6 +25,7 @@ class ChapterHtmlSlimParser {
int skipUntilDepth = INT_MAX;
int boldUntilDepth = INT_MAX;
int italicUntilDepth = INT_MAX;
int underlineUntilDepth = INT_MAX;
// buffer for building up words from characters, will auto break if longer than this
// leave one char at end for null pointer
char partWordBuffer[MAX_WORD_SIZE + 1] = {};
@ -37,7 +40,23 @@ class ChapterHtmlSlimParser {
uint16_t viewportWidth;
uint16_t viewportHeight;
bool hyphenationEnabled;
const CssParser* cssParser;
// Style tracking (replaces depth-based approach)
struct StyleStackEntry {
int depth = 0;
bool hasBold = false, bold = false;
bool hasItalic = false, italic = false;
bool hasUnderline = false, underline = false;
};
std::vector<StyleStackEntry> inlineStyleStack;
CssStyle currentBlockStyle;
bool effectiveBold = false;
bool effectiveItalic = false;
bool effectiveUnderline = false;
void updateEffectiveInlineStyle();
void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle);
void startNewTextBlock(TextBlock::Style style);
void flushPartWordBuffer();
void makePages();
@ -52,7 +71,8 @@ class ChapterHtmlSlimParser {
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
const uint16_t viewportHeight, const bool hyphenationEnabled,
const std::function<void(std::unique_ptr<Page>)>& completePageFn,
const std::function<void(int)>& progressFn = nullptr)
const std::function<void(int)>& progressFn = nullptr,
const CssParser* cssParser = nullptr)
: filepath(filepath),
renderer(renderer),
fontId(fontId),
@ -63,7 +83,8 @@ class ChapterHtmlSlimParser {
viewportHeight(viewportHeight),
hyphenationEnabled(hyphenationEnabled),
completePageFn(completePageFn),
progressFn(progressFn) {}
progressFn(progressFn),
cssParser(cssParser) {}
~ChapterHtmlSlimParser() = default;
bool parseAndBuildPages();
void addLineToPage(std::shared_ptr<TextBlock> line);

View File

@ -8,6 +8,7 @@
namespace {
constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
constexpr char MEDIA_TYPE_CSS[] = "text/css";
constexpr char itemCacheFile[] = "/.items.bin";
} // namespace
@ -218,6 +219,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
}
}
// Collect CSS files
if (mediaType == MEDIA_TYPE_CSS) {
self->cssFiles.push_back(href);
}
// EPUB 3: Check for nav document (properties contains "nav")
if (!properties.empty() && self->tocNavPath.empty()) {
// Properties is space-separated, check if "nav" is present as a word

View File

@ -64,6 +64,7 @@ class ContentOpfParser final : public Print {
std::string tocNavPath; // EPUB 3 nav document path
std::string coverItemHref;
std::string textReferenceHref;
std::vector<std::string> cssFiles; // CSS stylesheet paths
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
BookMetadataCache* cache)

View File

@ -462,6 +462,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const {
return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX;
}
int GfxRenderer::getIndentWidth(const int fontId, const char* text) const {
if (fontMap.count(fontId) == 0) {
Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);
return 0;
}
uint32_t cp;
int width = 0;
while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&text)))) {
width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX;
}
return width;
}
int GfxRenderer::getFontAscenderSize(const int fontId) const {
if (fontMap.count(fontId) == 0) {
Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);

View File

@ -78,6 +78,7 @@ class GfxRenderer {
void drawText(int fontId, int x, int y, const char* text, bool black = true,
EpdFontFamily::Style style = EpdFontFamily::REGULAR) const;
int getSpaceWidth(int fontId) const;
int getIndentWidth(int fontId, const char* text) const;
int getFontAscenderSize(int fontId) const;
int getLineHeight(int fontId) const;
std::string truncatedText(int fontId, const char* text, int maxWidth,