Compare commits

...

15 Commits

Author SHA1 Message Date
Jake Kenneally
8764c229eb
Merge d445eb0bb0 into f935b59a41 2026-02-01 18:36:29 +11:00
Jake Kenneally
d445eb0bb0 fix formatting 2026-01-31 15:14:51 -05:00
Jake Kenneally
394fc41819 add quotes to punctuation list 2026-01-31 15:11:22 -05:00
Jake Kenneally
a6d6e5e770 fix some styling edge cases: preserving spacing from parent elements, removing stray spaces after italicized text in child elements 2026-01-31 14:46:20 -05:00
Jake Kenneally
6796989247 calculate em based on font line height 2026-01-31 14:19:28 -05:00
Jake Kenneally
9dac5bf27e improve CSS margin, padding, and text-indent parsing
- margin, padding, and text-indent now all support ems, rems, and px values
- shorthand margin/padding CSS is also supported
- margin/padding/indent values of 0 should no longer erroneously produce additional spacing
2026-01-29 20:05:12 -05:00
Jake Kenneally
a41d0f04d5 formatting: run clang-format-fix 2026-01-27 20:25:02 -05:00
Jake Kenneally
834440aab4 Merge branch 'master' into feature/add-epub-css-parsing
* master: (33 commits)
  feat: add HalDisplay and HalGPIO (#522)
  feat: Display epub metadata on Recents (#511)
  chore: Cut release 0.16.0
  fix: Correctly render italics on image alt placeholders (#569)
  chore: .gitignore: add compile_commands.json & .cache (#568)
  fix: Render keyboard entry over multiple lines (#567)
  fix: missing front layout in mapLabels() (#564)
  refactor: Re-work for OTA feature (#509)
  perf: optimize large EPUB indexing from O(n^2) to O(n) (#458)
  feat: Add Spanish hyphenation support (#558)
  feat: Add support to B&W filters to image covers (#476)
  feat(ux): page turning on button pressed if long-press chapter skip is disabled (#451)
  feat: Add status bar option "Full w/ Progress Bar" (#438)
  fix: Validate settings on read. (#492)
  fix: rotate origin in drawImage (#557)
  feat: Extract author from XTC/XTCH files (#563)
  fix: add txt books to recent tab (#526)
  docs: add font generation commands to builtin font headers (#547)
  docs: Update README with supported languages for EPUB  (#530)
  fix: Fix KOReader document md5 calculation for binary matching progress sync (#529)
  ...
2026-01-27 20:24:38 -05:00
Jake Kenneally
8d7c7a5dbb Merge branch 'master' into feature/add-epub-css-parsing
* master:
  chore: Cut release 0.15.0
  fix: OPDS browser OOM (#403)
  docs: Add detailed webserver documentation (#446)
  feat: invalidate cache on web uploads and opds downloads and add Clear Cache action (#393)
  fix: hard reset via RTS pin after flashing firmware (#437)
  fix: Skip negative screen coordinates only after we read the bitmap row. (#431)
  Reclaim space if we don't show battery Percentage (#352)
  feat: Include superscripts and subscripts in fonts (#463)
  My Library: Tab bar w/ Recent Books + File Browser (#250)
  feat: adding categories to settings screen (#331)
2026-01-23 10:02:53 -06:00
Jake Kenneally
8f3d226bf3 increment versions to prevent error when opening cached EPUBs 2026-01-20 10:27:55 -06:00
Jake Kenneally
5c9412b141 fix compilation errors 2026-01-19 23:09:35 -06:00
Jake Kenneally
750a6ee1d8 rerun clang-format 2026-01-19 22:39:40 -06:00
Jake Kenneally
be2de1123b Merge remote-tracking branch 'origin' into feature/add-epub-css-parsing
* origin:
  fix: truncate chapter names that are too long (#422)
  feat: dict based Hyphenation (#305)
  fix: render U+FFFD replacement character instead of ? (#366)
  fix: Invert colors on home screen cover overlay when recent book is selected (#390)
  Adds KOReader Sync support (#232)
  feat: Change keyboard "caps" to "shift" & Wrap Keyboard (#377)
  fix: XTC 1-bit thumb BMP polarity inversion (#373)
2026-01-19 22:37:37 -06:00
Jake Kenneally
be10b90a71 formatting: run clang-format-fix 2026-01-17 18:35:44 -05:00
Jake Kenneally
94ce987f2c feat: Add CSS parsing and CSS support in EPUBs 2026-01-17 17:57:04 -05:00
19 changed files with 1521 additions and 89 deletions

View File

@ -86,6 +86,9 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
tocNavItem = opfParser.tocNavPath; tocNavItem = opfParser.tocNavPath;
} }
// Copy CSS files to metadata
bookMetadata.cssFiles = opfParser.cssFiles;
Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis()); Serial.printf("[%lu] [EBP] Successfully parsed content.opf\n", millis());
return true; return true;
} }
@ -204,6 +207,55 @@ bool Epub::parseTocNavFile() const {
return true; return true;
} }
bool Epub::parseCssFiles() {
if (!bookMetadataCache || !bookMetadataCache->isLoaded()) {
Serial.printf("[%lu] [EBP] Cannot parse CSS, cache not loaded\n", millis());
return false;
}
// Always create CssParser - needed for inline style parsing even without CSS files
cssParser.reset(new CssParser());
const auto& cssFiles = bookMetadataCache->coreMetadata.cssFiles;
if (cssFiles.empty()) {
Serial.printf("[%lu] [EBP] No CSS files to parse, but CssParser created for inline styles\n", millis());
return true;
}
for (const auto& cssPath : cssFiles) {
Serial.printf("[%lu] [EBP] Parsing CSS file: %s\n", millis(), cssPath.c_str());
// Extract CSS file to temp location
const auto tmpCssPath = getCachePath() + "/.tmp.css";
FsFile tempCssFile;
if (!SdMan.openFileForWrite("EBP", tmpCssPath, tempCssFile)) {
Serial.printf("[%lu] [EBP] Could not create temp CSS file\n", millis());
continue;
}
if (!readItemContentsToStream(cssPath, tempCssFile, 1024)) {
Serial.printf("[%lu] [EBP] Could not read CSS file: %s\n", millis(), cssPath.c_str());
tempCssFile.close();
SdMan.remove(tmpCssPath.c_str());
continue;
}
tempCssFile.close();
// Parse the CSS file
if (!SdMan.openFileForRead("EBP", tmpCssPath, tempCssFile)) {
Serial.printf("[%lu] [EBP] Could not open temp CSS file for reading\n", millis());
SdMan.remove(tmpCssPath.c_str());
continue;
}
cssParser->loadFromStream(tempCssFile);
tempCssFile.close();
SdMan.remove(tmpCssPath.c_str());
}
Serial.printf("[%lu] [EBP] Loaded %zu CSS style rules from %zu files\n", millis(), cssParser->ruleCount(),
cssFiles.size());
return true;
}
// load in the meta data for the epub file // load in the meta data for the epub file
bool Epub::load(const bool buildIfMissing) { bool Epub::load(const bool buildIfMissing) {
Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str()); Serial.printf("[%lu] [EBP] Loading ePub: %s\n", millis(), filepath.c_str());
@ -213,6 +265,8 @@ bool Epub::load(const bool buildIfMissing) {
// Try to load existing cache first // Try to load existing cache first
if (bookMetadataCache->load()) { if (bookMetadataCache->load()) {
// Parse CSS files from loaded cache
parseCssFiles();
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true; return true;
} }
@ -309,6 +363,9 @@ bool Epub::load(const bool buildIfMissing) {
return false; return false;
} }
// Parse CSS files after cache reload
parseCssFiles();
Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str()); Serial.printf("[%lu] [EBP] Loaded ePub: %s\n", millis(), filepath.c_str());
return true; return true;
} }

View File

@ -8,6 +8,7 @@
#include <vector> #include <vector>
#include "Epub/BookMetadataCache.h" #include "Epub/BookMetadataCache.h"
#include "Epub/css/CssParser.h"
class ZipFile; class ZipFile;
@ -24,11 +25,14 @@ class Epub {
std::string cachePath; std::string cachePath;
// Spine and TOC cache // Spine and TOC cache
std::unique_ptr<BookMetadataCache> bookMetadataCache; std::unique_ptr<BookMetadataCache> bookMetadataCache;
// CSS parser for styling
std::unique_ptr<CssParser> cssParser;
bool findContentOpfFile(std::string* contentOpfFile) const; bool findContentOpfFile(std::string* contentOpfFile) const;
bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata); bool parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata);
bool parseTocNcxFile() const; bool parseTocNcxFile() const;
bool parseTocNavFile() const; bool parseTocNavFile() const;
bool parseCssFiles();
public: public:
explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) { explicit Epub(std::string filepath, const std::string& cacheDir) : filepath(std::move(filepath)) {
@ -64,4 +68,5 @@ class Epub {
size_t getBookSize() const; size_t getBookSize() const;
float calculateProgress(int currentSpineIndex, float currentSpineRead) const; float calculateProgress(int currentSpineIndex, float currentSpineRead) const;
const CssParser* getCssParser() const { return cssParser.get(); }
}; };

View File

@ -9,7 +9,7 @@
#include "FsHelpers.h" #include "FsHelpers.h"
namespace { namespace {
constexpr uint8_t BOOK_CACHE_VERSION = 5; constexpr uint8_t BOOK_CACHE_VERSION = 6;
constexpr char bookBinFile[] = "/book.bin"; constexpr char bookBinFile[] = "/book.bin";
constexpr char tmpSpineBinFile[] = "/spine.bin.tmp"; constexpr char tmpSpineBinFile[] = "/spine.bin.tmp";
constexpr char tmpTocBinFile[] = "/toc.bin.tmp"; constexpr char tmpTocBinFile[] = "/toc.bin.tmp";
@ -115,9 +115,14 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
constexpr uint32_t headerASize = constexpr uint32_t headerASize =
sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount); sizeof(BOOK_CACHE_VERSION) + /* LUT Offset */ sizeof(uint32_t) + sizeof(spineCount) + sizeof(tocCount);
// Calculate CSS files size: count + each string (length + data)
uint32_t cssFilesSize = sizeof(uint16_t); // count
for (const auto& css : metadata.cssFiles) {
cssFilesSize += sizeof(uint32_t) + css.size();
}
const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.language.size() + const uint32_t metadataSize = metadata.title.size() + metadata.author.size() + metadata.language.size() +
metadata.coverItemHref.size() + metadata.textReferenceHref.size() + metadata.coverItemHref.size() + metadata.textReferenceHref.size() +
sizeof(uint32_t) * 5; sizeof(uint32_t) * 5 + cssFilesSize;
const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount; const uint32_t lutSize = sizeof(uint32_t) * spineCount + sizeof(uint32_t) * tocCount;
const uint32_t lutOffset = headerASize + metadataSize; const uint32_t lutOffset = headerASize + metadataSize;
@ -132,6 +137,11 @@ bool BookMetadataCache::buildBookBin(const std::string& epubPath, const BookMeta
serialization::writeString(bookFile, metadata.language); serialization::writeString(bookFile, metadata.language);
serialization::writeString(bookFile, metadata.coverItemHref); serialization::writeString(bookFile, metadata.coverItemHref);
serialization::writeString(bookFile, metadata.textReferenceHref); serialization::writeString(bookFile, metadata.textReferenceHref);
// CSS files
serialization::writePod(bookFile, static_cast<uint16_t>(metadata.cssFiles.size()));
for (const auto& css : metadata.cssFiles) {
serialization::writeString(bookFile, css);
}
// Loop through spine entries, writing LUT positions // Loop through spine entries, writing LUT positions
spineFile.seek(0); spineFile.seek(0);
@ -385,6 +395,16 @@ bool BookMetadataCache::load() {
serialization::readString(bookFile, coreMetadata.language); serialization::readString(bookFile, coreMetadata.language);
serialization::readString(bookFile, coreMetadata.coverItemHref); serialization::readString(bookFile, coreMetadata.coverItemHref);
serialization::readString(bookFile, coreMetadata.textReferenceHref); serialization::readString(bookFile, coreMetadata.textReferenceHref);
// CSS files
uint16_t cssCount;
serialization::readPod(bookFile, cssCount);
coreMetadata.cssFiles.clear();
coreMetadata.cssFiles.reserve(cssCount);
for (uint16_t i = 0; i < cssCount; i++) {
std::string cssPath;
serialization::readString(bookFile, cssPath);
coreMetadata.cssFiles.push_back(std::move(cssPath));
}
loaded = true; loaded = true;
Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount); Serial.printf("[%lu] [BMC] Loaded cache data: %d spine, %d TOC entries\n", millis(), spineCount, tocCount);

View File

@ -14,6 +14,7 @@ class BookMetadataCache {
std::string language; std::string language;
std::string coverItemHref; std::string coverItemHref;
std::string textReferenceHref; std::string textReferenceHref;
std::vector<std::string> cssFiles;
}; };
struct SpineEntry { struct SpineEntry {

View File

@ -19,6 +19,23 @@ namespace {
constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD"; constexpr char SOFT_HYPHEN_UTF8[] = "\xC2\xAD";
constexpr size_t SOFT_HYPHEN_BYTES = 2; constexpr size_t SOFT_HYPHEN_BYTES = 2;
// Check if a character is punctuation that should attach to the previous word
// (no space before it). Includes sentence punctuation and closing quotes.
// Excludes brackets/parens to avoid false positives with decorative patterns like "[ 1 ]".
bool isAttachingPunctuation(const char c) {
return c == '.' || c == ',' || c == '!' || c == '?' || c == ';' || c == ':' || c == '"' || c == '\'';
}
// Check if a word consists entirely of punctuation that should attach to the previous word
bool isAttachingPunctuationWord(const std::string& word) {
if (word.empty()) return false;
// Check if word starts with attaching punctuation and is short (to avoid false positives)
if (isAttachingPunctuation(word[0]) && word.size() <= 3) {
return true;
}
return false;
}
bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; } bool containsSoftHyphen(const std::string& word) { return word.find(SOFT_HYPHEN_UTF8) != std::string::npos; }
// Removes every soft hyphen in-place so rendered glyphs match measured widths. // Removes every soft hyphen in-place so rendered glyphs match measured widths.
@ -49,11 +66,12 @@ uint16_t measureWordWidth(const GfxRenderer& renderer, const int fontId, const s
} // namespace } // namespace
void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) { void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline) {
if (word.empty()) return; if (word.empty()) return;
words.push_back(std::move(word)); words.push_back(std::move(word));
wordStyles.push_back(fontStyle); wordStyles.push_back(fontStyle);
wordUnderlines.push_back(underline);
} }
// Consumes data to minimize memory usage // Consumes data to minimize memory usage
@ -94,7 +112,8 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
auto wordStylesIt = wordStyles.begin(); auto wordStylesIt = wordStyles.begin();
while (wordsIt != words.end()) { while (wordsIt != words.end()) {
wordWidths.push_back(measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt)); uint16_t width = measureWordWidth(renderer, fontId, *wordsIt, *wordStylesIt);
wordWidths.push_back(width);
std::advance(wordsIt, 1); std::advance(wordsIt, 1);
std::advance(wordStylesIt, 1); std::advance(wordStylesIt, 1);
@ -109,10 +128,18 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
return {}; return {};
} }
// Calculate first line indent (only for left/justified text without extra paragraph spacing)
const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing &&
(style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN)
? blockStyle.textIndent
: 0;
// Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation. // Ensure any word that would overflow even as the first entry on a line is split using fallback hyphenation.
for (size_t i = 0; i < wordWidths.size(); ++i) { for (size_t i = 0; i < wordWidths.size(); ++i) {
while (wordWidths[i] > pageWidth) { // First word needs to fit in reduced width if there's an indent
if (!hyphenateWordAtIndex(i, pageWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) { const int effectiveWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
while (wordWidths[i] > effectiveWidth) {
if (!hyphenateWordAtIndex(i, effectiveWidth, renderer, fontId, wordWidths, /*allowFallbackBreaks=*/true)) {
break; break;
} }
} }
@ -133,11 +160,14 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
int currlen = -spaceWidth; int currlen = -spaceWidth;
dp[i] = MAX_COST; dp[i] = MAX_COST;
// First line has reduced width due to text-indent
const int effectivePageWidth = i == 0 ? pageWidth - firstLineIndent : pageWidth;
for (size_t j = i; j < totalWordCount; ++j) { for (size_t j = i; j < totalWordCount; ++j) {
// Current line length: previous width + space + current word width // Current line length: previous width + space + current word width
currlen += wordWidths[j] + spaceWidth; currlen += wordWidths[j] + spaceWidth;
if (currlen > pageWidth) { if (currlen > effectivePageWidth) {
break; break;
} }
@ -145,7 +175,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
if (j == totalWordCount - 1) { if (j == totalWordCount - 1) {
cost = 0; // Last line cost = 0; // Last line
} else { } else {
const int remainingSpace = pageWidth - currlen; const int remainingSpace = effectivePageWidth - currlen;
// Use long long for the square to prevent overflow // Use long long for the square to prevent overflow
const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1]; const long long cost_ll = static_cast<long long>(remainingSpace) * remainingSpace + dp[j + 1];
@ -200,7 +230,11 @@ void ParsedText::applyParagraphIndent() {
return; return;
} }
if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) { if (blockStyle.textIndentDefined) {
// CSS text-indent is explicitly set (even if 0) - don't use fallback EmSpace
// The actual indent positioning is handled in extractLine()
} else if (style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN) {
// No CSS text-indent defined - use EmSpace fallback for visual indent
words.front().insert(0, "\xe2\x80\x83"); words.front().insert(0, "\xe2\x80\x83");
} }
} }
@ -209,13 +243,23 @@ void ParsedText::applyParagraphIndent() {
std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId, std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& renderer, const int fontId,
const int pageWidth, const int spaceWidth, const int pageWidth, const int spaceWidth,
std::vector<uint16_t>& wordWidths) { std::vector<uint16_t>& wordWidths) {
// Calculate first line indent (only for left/justified text without extra paragraph spacing)
const int firstLineIndent = blockStyle.textIndent > 0 && !extraParagraphSpacing &&
(style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN)
? blockStyle.textIndent
: 0;
std::vector<size_t> lineBreakIndices; std::vector<size_t> lineBreakIndices;
size_t currentIndex = 0; size_t currentIndex = 0;
bool isFirstLine = true;
while (currentIndex < wordWidths.size()) { while (currentIndex < wordWidths.size()) {
const size_t lineStart = currentIndex; const size_t lineStart = currentIndex;
int lineWidth = 0; int lineWidth = 0;
// First line has reduced width due to text-indent
const int effectivePageWidth = isFirstLine ? pageWidth - firstLineIndent : pageWidth;
// Consume as many words as possible for current line, splitting when prefixes fit // Consume as many words as possible for current line, splitting when prefixes fit
while (currentIndex < wordWidths.size()) { while (currentIndex < wordWidths.size()) {
const bool isFirstWord = currentIndex == lineStart; const bool isFirstWord = currentIndex == lineStart;
@ -223,14 +267,14 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
const int candidateWidth = spacing + wordWidths[currentIndex]; const int candidateWidth = spacing + wordWidths[currentIndex];
// Word fits on current line // Word fits on current line
if (lineWidth + candidateWidth <= pageWidth) { if (lineWidth + candidateWidth <= effectivePageWidth) {
lineWidth += candidateWidth; lineWidth += candidateWidth;
++currentIndex; ++currentIndex;
continue; continue;
} }
// Word would overflow — try to split based on hyphenation points // Word would overflow — try to split based on hyphenation points
const int availableWidth = pageWidth - lineWidth - spacing; const int availableWidth = effectivePageWidth - lineWidth - spacing;
const bool allowFallbackBreaks = isFirstWord; // Only for first word on line const bool allowFallbackBreaks = isFirstWord; // Only for first word on line
if (availableWidth > 0 && if (availableWidth > 0 &&
@ -250,6 +294,7 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
} }
lineBreakIndices.push_back(currentIndex); lineBreakIndices.push_back(currentIndex);
isFirstLine = false;
} }
return lineBreakIndices; return lineBreakIndices;
@ -334,49 +379,84 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0; const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
const size_t lineWordCount = lineBreak - lastBreakAt; const size_t lineWordCount = lineBreak - lastBreakAt;
// Calculate total word width for this line // Calculate first line indent (only for left/justified text without extra paragraph spacing)
const bool isFirstLine = breakIndex == 0;
const int firstLineIndent = isFirstLine && blockStyle.textIndent > 0 && !extraParagraphSpacing &&
(style == TextBlock::JUSTIFIED || style == TextBlock::LEFT_ALIGN)
? blockStyle.textIndent
: 0;
// Calculate total word width for this line and count actual word gaps
// (punctuation that attaches to previous word doesn't count as a gap)
// Note: words list starts at the beginning because previous lines were spliced out
int lineWordWidthSum = 0; int lineWordWidthSum = 0;
for (size_t i = lastBreakAt; i < lineBreak; i++) { size_t actualGapCount = 0;
lineWordWidthSum += wordWidths[i]; auto countWordIt = words.begin();
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
lineWordWidthSum += wordWidths[lastBreakAt + wordIdx];
// Count gaps: each word after the first creates a gap, unless it's attaching punctuation
if (wordIdx > 0 && !isAttachingPunctuationWord(*countWordIt)) {
actualGapCount++;
}
++countWordIt;
} }
// Calculate spacing // Calculate spacing (account for indent reducing effective page width on first line)
const int spareSpace = pageWidth - lineWordWidthSum; const int effectivePageWidth = pageWidth - firstLineIndent;
const int spareSpace = effectivePageWidth - lineWordWidthSum;
int spacing = spaceWidth; int spacing = spaceWidth;
const bool isLastLine = breakIndex == lineBreakIndices.size() - 1; const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;
if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) { // For justified text, calculate spacing based on actual gap count
spacing = spareSpace / (lineWordCount - 1); if (style == TextBlock::JUSTIFIED && !isLastLine && actualGapCount >= 1) {
spacing = spareSpace / static_cast<int>(actualGapCount);
} }
// Calculate initial x position // Calculate initial x position (first line starts at indent for left/justified text)
uint16_t xpos = 0; auto xpos = static_cast<uint16_t>(firstLineIndent);
if (style == TextBlock::RIGHT_ALIGN) { if (style == TextBlock::RIGHT_ALIGN) {
xpos = spareSpace - (lineWordCount - 1) * spaceWidth; xpos = spareSpace - static_cast<int>(actualGapCount) * spaceWidth;
} else if (style == TextBlock::CENTER_ALIGN) { } else if (style == TextBlock::CENTER_ALIGN) {
xpos = (spareSpace - (lineWordCount - 1) * spaceWidth) / 2; xpos = (spareSpace - static_cast<int>(actualGapCount) * spaceWidth) / 2;
} }
// Pre-calculate X positions for words // Pre-calculate X positions for words
// Punctuation that attaches to the previous word doesn't get space before it
// Note: words list starts at the beginning because previous lines were spliced out
std::list<uint16_t> lineXPos; std::list<uint16_t> lineXPos;
for (size_t i = lastBreakAt; i < lineBreak; i++) { auto wordIt = words.begin();
const uint16_t currentWordWidth = wordWidths[i];
for (size_t wordIdx = 0; wordIdx < lineWordCount; wordIdx++) {
const uint16_t currentWordWidth = wordWidths[lastBreakAt + wordIdx];
lineXPos.push_back(xpos); lineXPos.push_back(xpos);
xpos += currentWordWidth + spacing;
// Add spacing after this word, unless the next word is attaching punctuation
auto nextWordIt = wordIt;
++nextWordIt;
const bool nextIsAttachingPunctuation = wordIdx + 1 < lineWordCount && isAttachingPunctuationWord(*nextWordIt);
xpos += currentWordWidth + (nextIsAttachingPunctuation ? 0 : spacing);
++wordIt;
} }
// Iterators always start at the beginning as we are moving content with splice below // Iterators always start at the beginning as we are moving content with splice below
auto wordEndIt = words.begin(); auto wordEndIt = words.begin();
auto wordStyleEndIt = wordStyles.begin(); auto wordStyleEndIt = wordStyles.begin();
auto wordUnderlineEndIt = wordUnderlines.begin();
std::advance(wordEndIt, lineWordCount); std::advance(wordEndIt, lineWordCount);
std::advance(wordStyleEndIt, lineWordCount); std::advance(wordStyleEndIt, lineWordCount);
std::advance(wordUnderlineEndIt, lineWordCount);
// *** CRITICAL STEP: CONSUME DATA USING SPLICE *** // *** CRITICAL STEP: CONSUME DATA USING SPLICE ***
std::list<std::string> lineWords; std::list<std::string> lineWords;
lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt); lineWords.splice(lineWords.begin(), words, words.begin(), wordEndIt);
std::list<EpdFontFamily::Style> lineWordStyles; std::list<EpdFontFamily::Style> lineWordStyles;
lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt); lineWordStyles.splice(lineWordStyles.begin(), wordStyles, wordStyles.begin(), wordStyleEndIt);
std::list<bool> lineWordUnderlines;
lineWordUnderlines.splice(lineWordUnderlines.begin(), wordUnderlines, wordUnderlines.begin(), wordUnderlineEndIt);
for (auto& word : lineWords) { for (auto& word : lineWords) {
if (containsSoftHyphen(word)) { if (containsSoftHyphen(word)) {
@ -384,5 +464,6 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
} }
} }
processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style)); processLine(std::make_shared<TextBlock>(std::move(lineWords), std::move(lineXPos), std::move(lineWordStyles), style,
} blockStyle, std::move(lineWordUnderlines)));
}

View File

@ -8,6 +8,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "blocks/BlockStyle.h"
#include "blocks/TextBlock.h" #include "blocks/TextBlock.h"
class GfxRenderer; class GfxRenderer;
@ -15,7 +16,9 @@ class GfxRenderer;
class ParsedText { class ParsedText {
std::list<std::string> words; std::list<std::string> words;
std::list<EpdFontFamily::Style> wordStyles; std::list<EpdFontFamily::Style> wordStyles;
std::list<bool> wordUnderlines; // Track underline per word
TextBlock::Style style; TextBlock::Style style;
BlockStyle blockStyle;
bool extraParagraphSpacing; bool extraParagraphSpacing;
bool hyphenationEnabled; bool hyphenationEnabled;
@ -33,13 +36,18 @@ class ParsedText {
public: public:
explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing, explicit ParsedText(const TextBlock::Style style, const bool extraParagraphSpacing,
const bool hyphenationEnabled = false) const bool hyphenationEnabled = false, const BlockStyle& blockStyle = BlockStyle())
: style(style), extraParagraphSpacing(extraParagraphSpacing), hyphenationEnabled(hyphenationEnabled) {} : style(style),
blockStyle(blockStyle),
extraParagraphSpacing(extraParagraphSpacing),
hyphenationEnabled(hyphenationEnabled) {}
~ParsedText() = default; ~ParsedText() = default;
void addWord(std::string word, EpdFontFamily::Style fontStyle); void addWord(std::string word, EpdFontFamily::Style fontStyle, bool underline = false);
void setStyle(const TextBlock::Style style) { this->style = style; } void setStyle(const TextBlock::Style style) { this->style = style; }
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
TextBlock::Style getStyle() const { return style; } TextBlock::Style getStyle() const { return style; }
const BlockStyle& getBlockStyle() const { return blockStyle; }
size_t size() const { return words.size(); } size_t size() const { return words.size(); }
bool isEmpty() const { return words.empty(); } bool isEmpty() const { return words.empty(); }
void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth, void layoutAndExtractLines(const GfxRenderer& renderer, int fontId, uint16_t viewportWidth,

View File

@ -8,7 +8,7 @@
#include "parsers/ChapterHtmlSlimParser.h" #include "parsers/ChapterHtmlSlimParser.h"
namespace { namespace {
constexpr uint8_t SECTION_FILE_VERSION = 10; constexpr uint8_t SECTION_FILE_VERSION = 11;
constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) + constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(bool) +
sizeof(uint32_t); sizeof(uint32_t);
@ -186,8 +186,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
ChapterHtmlSlimParser visitor( ChapterHtmlSlimParser visitor(
tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth, tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, paragraphAlignment, viewportWidth,
viewportHeight, hyphenationEnabled, viewportHeight, hyphenationEnabled,
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, [this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); }, progressFn,
progressFn); epub->getCssParser());
Hyphenator::setPreferredLanguage(epub->getLanguage()); Hyphenator::setPreferredLanguage(epub->getLanguage());
success = visitor.parseAndBuildPages(); success = visitor.parseAndBuildPages();

View File

@ -0,0 +1,27 @@
#pragma once
#include <cstdint>
/**
* BlockStyle - Block-level CSS properties for paragraphs
*
* Used to track margin/padding spacing and text indentation for block elements.
* Padding is treated similarly to margins for rendering purposes.
*/
struct BlockStyle {
int16_t marginTop = 0; // pixels
int16_t marginBottom = 0; // pixels
int16_t marginLeft = 0; // pixels
int16_t marginRight = 0; // pixels
int16_t paddingTop = 0; // pixels (treated same as margin)
int16_t paddingBottom = 0; // pixels (treated same as margin)
int16_t paddingLeft = 0; // pixels (treated same as margin)
int16_t paddingRight = 0; // pixels (treated same as margin)
int16_t textIndent = 0; // pixels
bool textIndentDefined = false; // true if text-indent was explicitly set in CSS
// Combined horizontal insets (margin + padding)
[[nodiscard]] int16_t leftInset() const { return marginLeft + paddingLeft; }
[[nodiscard]] int16_t rightInset() const { return marginRight + paddingRight; }
[[nodiscard]] int16_t totalHorizontalInset() const { return leftInset() + rightInset(); }
};

View File

@ -14,13 +14,40 @@ void TextBlock::render(const GfxRenderer& renderer, const int fontId, const int
auto wordIt = words.begin(); auto wordIt = words.begin();
auto wordStylesIt = wordStyles.begin(); auto wordStylesIt = wordStyles.begin();
auto wordXposIt = wordXpos.begin(); auto wordXposIt = wordXpos.begin();
auto wordUnderlineIt = wordUnderlines.begin();
for (size_t i = 0; i < words.size(); i++) { for (size_t i = 0; i < words.size(); i++) {
renderer.drawText(fontId, *wordXposIt + x, y, wordIt->c_str(), true, *wordStylesIt); const int wordX = *wordXposIt + x;
renderer.drawText(fontId, wordX, y, wordIt->c_str(), true, *wordStylesIt);
// Draw underline if word is underlined
if (wordUnderlineIt != wordUnderlines.end() && *wordUnderlineIt) {
const std::string& w = *wordIt;
const int fullWordWidth = renderer.getTextWidth(fontId, w.c_str(), *wordStylesIt);
// y is the top of the text line; add ascender to reach baseline, then offset 2px below
const int underlineY = y + renderer.getFontAscenderSize(fontId) + 2;
int startX = wordX;
int underlineWidth = fullWordWidth;
// if word starts with em-space ("\xe2\x80\x83"), account for the additional indent before drawing the line
if (w.size() >= 3 && static_cast<uint8_t>(w[0]) == 0xE2 && static_cast<uint8_t>(w[1]) == 0x80 &&
static_cast<uint8_t>(w[2]) == 0x83) {
const char* visiblePtr = w.c_str() + 3;
const int prefixWidth = renderer.getIndentWidth(fontId, std::string("\xe2\x80\x83").c_str());
const int visibleWidth = renderer.getTextWidth(fontId, visiblePtr, *wordStylesIt);
startX = wordX + prefixWidth;
underlineWidth = visibleWidth;
}
renderer.drawLine(startX, underlineY, startX + underlineWidth, underlineY, true);
}
std::advance(wordIt, 1); std::advance(wordIt, 1);
std::advance(wordStylesIt, 1); std::advance(wordStylesIt, 1);
std::advance(wordXposIt, 1); std::advance(wordXposIt, 1);
if (wordUnderlineIt != wordUnderlines.end()) {
std::advance(wordUnderlineIt, 1);
}
} }
} }
@ -37,9 +64,40 @@ bool TextBlock::serialize(FsFile& file) const {
for (auto x : wordXpos) serialization::writePod(file, x); for (auto x : wordXpos) serialization::writePod(file, x);
for (auto s : wordStyles) serialization::writePod(file, s); for (auto s : wordStyles) serialization::writePod(file, s);
// Block style // Underline flags (packed as bytes, 8 words per byte)
uint8_t underlineByte = 0;
int bitIndex = 0;
auto underlineIt = wordUnderlines.begin();
for (size_t i = 0; i < words.size(); i++) {
if (underlineIt != wordUnderlines.end() && *underlineIt) {
underlineByte |= 1 << bitIndex;
}
bitIndex++;
if (bitIndex == 8 || i == words.size() - 1) {
serialization::writePod(file, underlineByte);
underlineByte = 0;
bitIndex = 0;
}
if (underlineIt != wordUnderlines.end()) {
++underlineIt;
}
}
// Block style (alignment)
serialization::writePod(file, style); serialization::writePod(file, style);
// Block style (margins/padding/indent)
serialization::writePod(file, blockStyle.marginTop);
serialization::writePod(file, blockStyle.marginBottom);
serialization::writePod(file, blockStyle.marginLeft);
serialization::writePod(file, blockStyle.marginRight);
serialization::writePod(file, blockStyle.paddingTop);
serialization::writePod(file, blockStyle.paddingBottom);
serialization::writePod(file, blockStyle.paddingLeft);
serialization::writePod(file, blockStyle.paddingRight);
serialization::writePod(file, blockStyle.textIndent);
serialization::writePod(file, blockStyle.textIndentDefined);
return true; return true;
} }
@ -48,7 +106,9 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
std::list<std::string> words; std::list<std::string> words;
std::list<uint16_t> wordXpos; std::list<uint16_t> wordXpos;
std::list<EpdFontFamily::Style> wordStyles; std::list<EpdFontFamily::Style> wordStyles;
std::list<bool> wordUnderlines;
Style style; Style style;
BlockStyle blockStyle;
// Word count // Word count
serialization::readPod(file, wc); serialization::readPod(file, wc);
@ -67,8 +127,34 @@ std::unique_ptr<TextBlock> TextBlock::deserialize(FsFile& file) {
for (auto& x : wordXpos) serialization::readPod(file, x); for (auto& x : wordXpos) serialization::readPod(file, x);
for (auto& s : wordStyles) serialization::readPod(file, s); for (auto& s : wordStyles) serialization::readPod(file, s);
// Block style // Underline flags (packed as bytes, 8 words per byte)
wordUnderlines.resize(wc, false);
auto underlineIt = wordUnderlines.begin();
const int bytesNeeded = (wc + 7) / 8;
for (int byteIdx = 0; byteIdx < bytesNeeded; byteIdx++) {
uint8_t underlineByte;
serialization::readPod(file, underlineByte);
for (int bit = 0; bit < 8 && underlineIt != wordUnderlines.end(); bit++) {
*underlineIt = (underlineByte & 1 << bit) != 0;
++underlineIt;
}
}
// Block style (alignment)
serialization::readPod(file, style); serialization::readPod(file, style);
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style)); // Block style (margins/padding/indent)
serialization::readPod(file, blockStyle.marginTop);
serialization::readPod(file, blockStyle.marginBottom);
serialization::readPod(file, blockStyle.marginLeft);
serialization::readPod(file, blockStyle.marginRight);
serialization::readPod(file, blockStyle.paddingTop);
serialization::readPod(file, blockStyle.paddingBottom);
serialization::readPod(file, blockStyle.paddingLeft);
serialization::readPod(file, blockStyle.paddingRight);
serialization::readPod(file, blockStyle.textIndent);
serialization::readPod(file, blockStyle.textIndentDefined);
return std::unique_ptr<TextBlock>(new TextBlock(std::move(words), std::move(wordXpos), std::move(wordStyles), style,
blockStyle, std::move(wordUnderlines)));
} }

View File

@ -7,6 +7,7 @@
#include <string> #include <string>
#include "Block.h" #include "Block.h"
#include "BlockStyle.h"
// Represents a line of text on a page // Represents a line of text on a page
class TextBlock final : public Block { class TextBlock final : public Block {
@ -22,15 +23,30 @@ class TextBlock final : public Block {
std::list<std::string> words; std::list<std::string> words;
std::list<uint16_t> wordXpos; std::list<uint16_t> wordXpos;
std::list<EpdFontFamily::Style> wordStyles; std::list<EpdFontFamily::Style> wordStyles;
std::list<bool> wordUnderlines; // Track underline per word
Style style; Style style;
BlockStyle blockStyle;
public: public:
explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos, explicit TextBlock(std::list<std::string> words, std::list<uint16_t> word_xpos,
std::list<EpdFontFamily::Style> word_styles, const Style style) std::list<EpdFontFamily::Style> word_styles, const Style style,
: words(std::move(words)), wordXpos(std::move(word_xpos)), wordStyles(std::move(word_styles)), style(style) {} const BlockStyle& blockStyle = BlockStyle(), std::list<bool> word_underlines = std::list<bool>())
: words(std::move(words)),
wordXpos(std::move(word_xpos)),
wordStyles(std::move(word_styles)),
wordUnderlines(std::move(word_underlines)),
style(style),
blockStyle(blockStyle) {
// Ensure underlines list matches words list size
while (this->wordUnderlines.size() < this->words.size()) {
this->wordUnderlines.push_back(false);
}
}
~TextBlock() override = default; ~TextBlock() override = default;
void setStyle(const Style style) { this->style = style; } void setStyle(const Style style) { this->style = style; }
void setBlockStyle(const BlockStyle& blockStyle) { this->blockStyle = blockStyle; }
Style getStyle() const { return style; } Style getStyle() const { return style; }
const BlockStyle& getBlockStyle() const { return blockStyle; }
bool isEmpty() override { return words.empty(); } bool isEmpty() override { return words.empty(); }
void layout(GfxRenderer& renderer) override {}; void layout(GfxRenderer& renderer) override {};
// given a renderer works out where to break the words into lines // given a renderer works out where to break the words into lines

View File

@ -0,0 +1,527 @@
#include "CssParser.h"
#include <HardwareSerial.h>
#include <algorithm>
#include <cctype>
namespace {
// Buffer size for reading CSS files
constexpr size_t READ_BUFFER_SIZE = 512;
// Maximum CSS file size we'll process (prevent memory issues)
constexpr size_t MAX_CSS_SIZE = 64 * 1024;
// Check if character is CSS whitespace
bool isCssWhitespace(const char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; }
// Read entire file into string (with size limit)
std::string readFileContent(FsFile& file) {
std::string content;
content.reserve(std::min(static_cast<size_t>(file.size()), MAX_CSS_SIZE));
char buffer[READ_BUFFER_SIZE];
while (file.available() && content.size() < MAX_CSS_SIZE) {
const int bytesRead = file.read(buffer, sizeof(buffer));
if (bytesRead <= 0) break;
content.append(buffer, bytesRead);
}
return content;
}
// Remove CSS comments (/* ... */) from content
std::string stripComments(const std::string& css) {
std::string result;
result.reserve(css.size());
size_t pos = 0;
while (pos < css.size()) {
// Look for start of comment
if (pos + 1 < css.size() && css[pos] == '/' && css[pos + 1] == '*') {
// Find end of comment
const size_t endPos = css.find("*/", pos + 2);
if (endPos == std::string::npos) {
// Unterminated comment - skip rest of file
break;
}
pos = endPos + 2;
} else {
result.push_back(css[pos]);
++pos;
}
}
return result;
}
// Skip @-rules (like @media, @import, @font-face)
// Returns position after the @-rule
size_t skipAtRule(const std::string& css, const size_t start) {
// Find the end - either semicolon (simple @-rule) or matching brace
size_t pos = start + 1; // Skip the '@'
// Skip identifier
while (pos < css.size() && (std::isalnum(css[pos]) || css[pos] == '-')) {
++pos;
}
// Look for { or ;
int braceDepth = 0;
while (pos < css.size()) {
const char c = css[pos];
if (c == '{') {
++braceDepth;
} else if (c == '}') {
--braceDepth;
if (braceDepth == 0) {
return pos + 1;
}
} else if (c == ';' && braceDepth == 0) {
return pos + 1;
}
++pos;
}
return css.size();
}
// Extract next rule from CSS content
// Returns true if a rule was found, with selector and body filled
bool extractNextRule(const std::string& css, size_t& pos, std::string& selector, std::string& body) {
selector.clear();
body.clear();
// Skip whitespace and @-rules until we find a regular rule
while (pos < css.size()) {
// Skip whitespace
while (pos < css.size() && isCssWhitespace(css[pos])) {
++pos;
}
if (pos >= css.size()) return false;
// Handle @-rules iteratively (avoids recursion/stack overflow)
if (css[pos] == '@') {
pos = skipAtRule(css, pos);
continue; // Try again after skipping the @-rule
}
break; // Found start of a regular rule
}
if (pos >= css.size()) return false;
// Find opening brace
const size_t bracePos = css.find('{', pos);
if (bracePos == std::string::npos) return false;
// Extract selector (everything before the brace)
selector = css.substr(pos, bracePos - pos);
// Find matching closing brace
int depth = 1;
const size_t bodyStart = bracePos + 1;
size_t bodyEnd = bodyStart;
while (bodyEnd < css.size() && depth > 0) {
if (css[bodyEnd] == '{')
++depth;
else if (css[bodyEnd] == '}')
--depth;
++bodyEnd;
}
// Extract body (between braces)
if (bodyEnd > bodyStart) {
body = css.substr(bodyStart, bodyEnd - bodyStart - 1);
}
pos = bodyEnd;
return true;
}
} // anonymous namespace
// String utilities implementation
std::string CssParser::normalized(const std::string& s) {
std::string result;
result.reserve(s.size());
bool inSpace = true; // Start true to skip leading space
for (const char c : s) {
if (isCssWhitespace(c)) {
if (!inSpace) {
result.push_back(' ');
inSpace = true;
}
} else {
result.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(c))));
inSpace = false;
}
}
// Remove trailing space
if (!result.empty() && result.back() == ' ') {
result.pop_back();
}
return result;
}
std::vector<std::string> CssParser::splitOnChar(const std::string& s, const char delimiter) {
std::vector<std::string> parts;
size_t start = 0;
for (size_t i = 0; i <= s.size(); ++i) {
if (i == s.size() || s[i] == delimiter) {
std::string part = s.substr(start, i - start);
std::string trimmed = normalized(part);
if (!trimmed.empty()) {
parts.push_back(trimmed);
}
start = i + 1;
}
}
return parts;
}
std::vector<std::string> CssParser::splitWhitespace(const std::string& s) {
std::vector<std::string> parts;
size_t start = 0;
bool inWord = false;
for (size_t i = 0; i <= s.size(); ++i) {
const bool isSpace = i == s.size() || isCssWhitespace(s[i]);
if (isSpace && inWord) {
parts.push_back(s.substr(start, i - start));
inWord = false;
} else if (!isSpace && !inWord) {
start = i;
inWord = true;
}
}
return parts;
}
// Property value interpreters
TextAlign CssParser::interpretAlignment(const std::string& val) {
const std::string v = normalized(val);
if (v == "left" || v == "start") return TextAlign::Left;
if (v == "right" || v == "end") return TextAlign::Right;
if (v == "center") return TextAlign::Center;
if (v == "justify") return TextAlign::Justify;
return TextAlign::None;
}
CssFontStyle CssParser::interpretFontStyle(const std::string& val) {
const std::string v = normalized(val);
if (v == "italic" || v == "oblique") return CssFontStyle::Italic;
return CssFontStyle::Normal;
}
CssFontWeight CssParser::interpretFontWeight(const std::string& val) {
const std::string v = normalized(val);
// Named values
if (v == "bold" || v == "bolder") return CssFontWeight::Bold;
if (v == "normal" || v == "lighter") return CssFontWeight::Normal;
// Numeric values: 100-900
// CSS spec: 400 = normal, 700 = bold
// We use: 0-400 = normal, 700+ = bold, 500-600 = normal (conservative)
char* endPtr = nullptr;
const long numericWeight = std::strtol(v.c_str(), &endPtr, 10);
// If we parsed a number and consumed the whole string
if (endPtr != v.c_str() && *endPtr == '\0') {
return numericWeight >= 700 ? CssFontWeight::Bold : CssFontWeight::Normal;
}
return CssFontWeight::Normal;
}
CssTextDecoration CssParser::interpretDecoration(const std::string& val) {
const std::string v = normalized(val);
// text-decoration can have multiple space-separated values
if (v.find("underline") != std::string::npos) {
return CssTextDecoration::Underline;
}
return CssTextDecoration::None;
}
CssLength CssParser::interpretLength(const std::string& val) {
const std::string v = normalized(val);
if (v.empty()) return CssLength{};
// Find where the number ends
size_t unitStart = v.size();
for (size_t i = 0; i < v.size(); ++i) {
const char c = v[i];
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
unitStart = i;
break;
}
}
const std::string numPart = v.substr(0, unitStart);
const std::string unitPart = v.substr(unitStart);
// Parse numeric value
char* endPtr = nullptr;
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
if (endPtr == numPart.c_str()) return CssLength{}; // No number parsed
// Determine unit type (preserve for deferred resolution)
auto unit = CssUnit::Pixels;
if (unitPart == "em") {
unit = CssUnit::Em;
} else if (unitPart == "rem") {
unit = CssUnit::Rem;
} else if (unitPart == "pt") {
unit = CssUnit::Points;
}
// px and unitless default to Pixels
return CssLength{numericValue, unit};
}
int8_t CssParser::interpretSpacing(const std::string& val) {
const std::string v = normalized(val);
if (v.empty()) return 0;
// For spacing, we convert to "lines" (discrete units for e-ink)
// 1em ≈ 1 line, percentages based on ~30 lines per page
float multiplier = 0.0f;
size_t unitStart = v.size();
for (size_t i = 0; i < v.size(); ++i) {
const char c = v[i];
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+') {
unitStart = i;
break;
}
}
const std::string numPart = v.substr(0, unitStart);
const std::string unitPart = v.substr(unitStart);
if (unitPart == "em" || unitPart == "rem") {
multiplier = 1.0f; // 1em = 1 line
} else if (unitPart == "%") {
multiplier = 0.3f; // ~30 lines per page, so 10% = 3 lines
} else {
return 0; // Unsupported unit for spacing
}
char* endPtr = nullptr;
const float numericValue = std::strtof(numPart.c_str(), &endPtr);
if (endPtr == numPart.c_str()) return 0;
int lines = static_cast<int>(numericValue * multiplier);
// Clamp to reasonable range (0-2 lines)
if (lines < 0) lines = 0;
if (lines > 2) lines = 2;
return static_cast<int8_t>(lines);
}
// Declaration parsing
CssStyle CssParser::parseDeclarations(const std::string& declBlock) {
CssStyle style;
// Split declarations by semicolon
const auto declarations = splitOnChar(declBlock, ';');
for (const auto& decl : declarations) {
// Find colon separator
const size_t colonPos = decl.find(':');
if (colonPos == std::string::npos || colonPos == 0) continue;
std::string propName = normalized(decl.substr(0, colonPos));
std::string propValue = normalized(decl.substr(colonPos + 1));
if (propName.empty() || propValue.empty()) continue;
// Match property and set value
if (propName == "text-align") {
const TextAlign align = interpretAlignment(propValue);
if (align != TextAlign::None) {
style.alignment = align;
style.defined.alignment = 1;
}
} else if (propName == "font-style") {
style.fontStyle = interpretFontStyle(propValue);
style.defined.fontStyle = 1;
} else if (propName == "font-weight") {
style.fontWeight = interpretFontWeight(propValue);
style.defined.fontWeight = 1;
} else if (propName == "text-decoration" || propName == "text-decoration-line") {
style.decoration = interpretDecoration(propValue);
style.defined.decoration = 1;
} else if (propName == "text-indent") {
style.indent = interpretLength(propValue);
style.defined.indent = 1;
} else if (propName == "margin-top") {
style.marginTop = interpretLength(propValue);
style.defined.marginTop = 1;
} else if (propName == "margin-bottom") {
style.marginBottom = interpretLength(propValue);
style.defined.marginBottom = 1;
} else if (propName == "margin-left") {
style.marginLeft = interpretLength(propValue);
style.defined.marginLeft = 1;
} else if (propName == "margin-right") {
style.marginRight = interpretLength(propValue);
style.defined.marginRight = 1;
} else if (propName == "margin") {
// Shorthand: 1-4 values for top, right, bottom, left
const auto values = splitWhitespace(propValue);
if (!values.empty()) {
const CssLength top = interpretLength(values[0]);
const CssLength right = values.size() >= 2 ? interpretLength(values[1]) : top;
const CssLength bottom = values.size() >= 3 ? interpretLength(values[2]) : top;
const CssLength left = values.size() >= 4 ? interpretLength(values[3]) : right;
style.marginTop = top;
style.marginRight = right;
style.marginBottom = bottom;
style.marginLeft = left;
style.defined.marginTop = style.defined.marginRight = style.defined.marginBottom = style.defined.marginLeft = 1;
}
} else if (propName == "padding-top") {
style.paddingTop = interpretLength(propValue);
style.defined.paddingTop = 1;
} else if (propName == "padding-bottom") {
style.paddingBottom = interpretLength(propValue);
style.defined.paddingBottom = 1;
} else if (propName == "padding-left") {
style.paddingLeft = interpretLength(propValue);
style.defined.paddingLeft = 1;
} else if (propName == "padding-right") {
style.paddingRight = interpretLength(propValue);
style.defined.paddingRight = 1;
} else if (propName == "padding") {
// Shorthand: 1-4 values for top, right, bottom, left
const auto values = splitWhitespace(propValue);
if (!values.empty()) {
const CssLength top = interpretLength(values[0]);
const CssLength right = values.size() >= 2 ? interpretLength(values[1]) : top;
const CssLength bottom = values.size() >= 3 ? interpretLength(values[2]) : top;
const CssLength left = values.size() >= 4 ? interpretLength(values[3]) : right;
style.paddingTop = top;
style.paddingRight = right;
style.paddingBottom = bottom;
style.paddingLeft = left;
style.defined.paddingTop = style.defined.paddingRight = style.defined.paddingBottom =
style.defined.paddingLeft = 1;
}
}
}
return style;
}
// Rule processing
void CssParser::processRuleBlock(const std::string& selectorGroup, const std::string& declarations) {
const CssStyle style = parseDeclarations(declarations);
// Only store if any properties were set
if (!style.defined.anySet()) return;
// Handle comma-separated selectors
const auto selectors = splitOnChar(selectorGroup, ',');
for (const auto& sel : selectors) {
// Normalize the selector
std::string key = normalized(sel);
if (key.empty()) continue;
// Store or merge with existing
auto it = rulesBySelector_.find(key);
if (it != rulesBySelector_.end()) {
it->second.applyOver(style);
} else {
rulesBySelector_[key] = style;
}
}
}
// Main parsing entry point
bool CssParser::loadFromStream(FsFile& source) {
if (!source) {
Serial.printf("[%lu] [CSS] Cannot read from invalid file\n", millis());
return false;
}
// Read file content
const std::string content = readFileContent(source);
if (content.empty()) {
return true; // Empty file is valid
}
// Remove comments
const std::string cleaned = stripComments(content);
// Parse rules
size_t pos = 0;
std::string selector, body;
while (extractNextRule(cleaned, pos, selector, body)) {
processRuleBlock(selector, body);
}
Serial.printf("[%lu] [CSS] Parsed %zu rules\n", millis(), rulesBySelector_.size());
return true;
}
// Style resolution
CssStyle CssParser::resolveStyle(const std::string& tagName, const std::string& classAttr) const {
CssStyle result;
const std::string tag = normalized(tagName);
// 1. Apply element-level style (lowest priority)
const auto tagIt = rulesBySelector_.find(tag);
if (tagIt != rulesBySelector_.end()) {
result.applyOver(tagIt->second);
}
// 2. Apply class styles (medium priority)
if (!classAttr.empty()) {
const auto classes = splitWhitespace(classAttr);
for (const auto& cls : classes) {
std::string classKey = "." + normalized(cls);
auto classIt = rulesBySelector_.find(classKey);
if (classIt != rulesBySelector_.end()) {
result.applyOver(classIt->second);
}
}
// 3. Apply element.class styles (higher priority)
for (const auto& cls : classes) {
std::string combinedKey = tag + "." + normalized(cls);
auto combinedIt = rulesBySelector_.find(combinedKey);
if (combinedIt != rulesBySelector_.end()) {
result.applyOver(combinedIt->second);
}
}
}
return result;
}
// Inline style parsing (static - doesn't need rule database)
CssStyle CssParser::parseInlineStyle(const std::string& styleValue) { return parseDeclarations(styleValue); }

View File

@ -0,0 +1,99 @@
#pragma once
#include <SdFat.h>
#include <string>
#include <unordered_map>
#include <vector>
#include "CssStyle.h"
/**
* Lightweight CSS parser for EPUB stylesheets
*
* Parses CSS files and extracts styling information relevant for e-ink display.
* Uses a two-phase approach: first tokenizes the CSS content, then builds
* a rule database that can be queried during HTML parsing.
*
* Supported selectors:
* - Element selectors: p, div, h1, etc.
* - Class selectors: .classname
* - Combined: element.classname
* - Grouped: selector1, selector2 { }
*
* Not supported (silently ignored):
* - Descendant/child selectors
* - Pseudo-classes and pseudo-elements
* - Media queries (content is skipped)
* - @import, @font-face, etc.
*/
class CssParser {
public:
CssParser() = default;
~CssParser() = default;
// Non-copyable
CssParser(const CssParser&) = delete;
CssParser& operator=(const CssParser&) = delete;
/**
* Load and parse CSS from a file stream.
* Can be called multiple times to accumulate rules from multiple stylesheets.
* @param source Open file handle to read from
* @return true if parsing completed (even if no rules found)
*/
bool loadFromStream(FsFile& source);
/**
* Look up the style for an HTML element, considering tag name and class attributes.
* Applies CSS cascade: element style < class style < element.class style
*
* @param tagName The HTML element name (e.g., "p", "div")
* @param classAttr The class attribute value (may contain multiple space-separated classes)
* @return Combined style with all applicable rules merged
*/
[[nodiscard]] CssStyle resolveStyle(const std::string& tagName, const std::string& classAttr) const;
/**
* Parse an inline style attribute string.
* @param styleValue The value of a style="" attribute
* @return Parsed style properties
*/
[[nodiscard]] static CssStyle parseInlineStyle(const std::string& styleValue);
/**
* Check if any rules have been loaded
*/
[[nodiscard]] bool empty() const { return rulesBySelector_.empty(); }
/**
* Get count of loaded rule sets
*/
[[nodiscard]] size_t ruleCount() const { return rulesBySelector_.size(); }
/**
* Clear all loaded rules
*/
void clear() { rulesBySelector_.clear(); }
private:
// Storage: maps normalized selector -> style properties
std::unordered_map<std::string, CssStyle> rulesBySelector_;
// Internal parsing helpers
void processRuleBlock(const std::string& selectorGroup, const std::string& declarations);
static CssStyle parseDeclarations(const std::string& declBlock);
// Individual property value parsers
static TextAlign interpretAlignment(const std::string& val);
static CssFontStyle interpretFontStyle(const std::string& val);
static CssFontWeight interpretFontWeight(const std::string& val);
static CssTextDecoration interpretDecoration(const std::string& val);
static CssLength interpretLength(const std::string& val);
static int8_t interpretSpacing(const std::string& val);
// String utilities
static std::string normalized(const std::string& s);
static std::vector<std::string> splitOnChar(const std::string& s, char delimiter);
static std::vector<std::string> splitWhitespace(const std::string& s);
};

View File

@ -0,0 +1,199 @@
#pragma once
#include <cstdint>
// Text alignment options matching CSS text-align property
enum class TextAlign : uint8_t { None = 0, Left = 1, Right = 2, Center = 3, Justify = 4 };
// CSS length unit types
enum class CssUnit : uint8_t { Pixels = 0, Em = 1, Rem = 2, Points = 3 };
// Represents a CSS length value with its unit, allowing deferred resolution to pixels
struct CssLength {
float value = 0.0f;
CssUnit unit = CssUnit::Pixels;
CssLength() = default;
CssLength(const float v, const CssUnit u) : value(v), unit(u) {}
// Convenience constructor for pixel values (most common case)
explicit CssLength(const float pixels) : value(pixels) {}
// Resolve to pixels given the current em size (font line height)
[[nodiscard]] float toPixels(const float emSize) const {
switch (unit) {
case CssUnit::Em:
case CssUnit::Rem:
return value * emSize;
case CssUnit::Points:
return value * 1.33f; // Approximate pt to px conversion
default:
return value;
}
}
// Resolve to int16_t pixels (for BlockStyle fields)
[[nodiscard]] int16_t toPixelsInt16(const float emSize) const { return static_cast<int16_t>(toPixels(emSize)); }
};
// Font style options matching CSS font-style property
enum class CssFontStyle : uint8_t { Normal = 0, Italic = 1 };
// Font weight options - CSS supports 100-900, we simplify to normal/bold
enum class CssFontWeight : uint8_t { Normal = 0, Bold = 1 };
// Text decoration options
enum class CssTextDecoration : uint8_t { None = 0, Underline = 1 };
// Bitmask for tracking which properties have been explicitly set
struct CssPropertyFlags {
uint16_t alignment : 1;
uint16_t fontStyle : 1;
uint16_t fontWeight : 1;
uint16_t decoration : 1;
uint16_t indent : 1;
uint16_t marginTop : 1;
uint16_t marginBottom : 1;
uint16_t marginLeft : 1;
uint16_t marginRight : 1;
uint16_t paddingTop : 1;
uint16_t paddingBottom : 1;
uint16_t paddingLeft : 1;
uint16_t paddingRight : 1;
uint16_t reserved : 3;
CssPropertyFlags()
: alignment(0),
fontStyle(0),
fontWeight(0),
decoration(0),
indent(0),
marginTop(0),
marginBottom(0),
marginLeft(0),
marginRight(0),
paddingTop(0),
paddingBottom(0),
paddingLeft(0),
paddingRight(0),
reserved(0) {}
[[nodiscard]] bool anySet() const {
return alignment || fontStyle || fontWeight || decoration || indent || marginTop || marginBottom || marginLeft ||
marginRight || paddingTop || paddingBottom || paddingLeft || paddingRight;
}
void clearAll() {
alignment = fontStyle = fontWeight = decoration = indent = 0;
marginTop = marginBottom = marginLeft = marginRight = 0;
paddingTop = paddingBottom = paddingLeft = paddingRight = 0;
}
};
// Represents a collection of CSS style properties
// Only stores properties relevant to e-ink text rendering
// Length values are stored as CssLength (value + unit) for deferred resolution
struct CssStyle {
TextAlign alignment = TextAlign::None;
CssFontStyle fontStyle = CssFontStyle::Normal;
CssFontWeight fontWeight = CssFontWeight::Normal;
CssTextDecoration decoration = CssTextDecoration::None;
CssLength indent; // First-line indent (deferred resolution)
CssLength marginTop; // Vertical spacing before block
CssLength marginBottom; // Vertical spacing after block
CssLength marginLeft; // Horizontal spacing left of block
CssLength marginRight; // Horizontal spacing right of block
CssLength paddingTop; // Padding before
CssLength paddingBottom; // Padding after
CssLength paddingLeft; // Padding left
CssLength paddingRight; // Padding right
CssPropertyFlags defined; // Tracks which properties were explicitly set
// Apply properties from another style, only overwriting if the other style
// has that property explicitly defined
void applyOver(const CssStyle& base) {
if (base.defined.alignment) {
alignment = base.alignment;
defined.alignment = 1;
}
if (base.defined.fontStyle) {
fontStyle = base.fontStyle;
defined.fontStyle = 1;
}
if (base.defined.fontWeight) {
fontWeight = base.fontWeight;
defined.fontWeight = 1;
}
if (base.defined.decoration) {
decoration = base.decoration;
defined.decoration = 1;
}
if (base.defined.indent) {
indent = base.indent;
defined.indent = 1;
}
if (base.defined.marginTop) {
marginTop = base.marginTop;
defined.marginTop = 1;
}
if (base.defined.marginBottom) {
marginBottom = base.marginBottom;
defined.marginBottom = 1;
}
if (base.defined.marginLeft) {
marginLeft = base.marginLeft;
defined.marginLeft = 1;
}
if (base.defined.marginRight) {
marginRight = base.marginRight;
defined.marginRight = 1;
}
if (base.defined.paddingTop) {
paddingTop = base.paddingTop;
defined.paddingTop = 1;
}
if (base.defined.paddingBottom) {
paddingBottom = base.paddingBottom;
defined.paddingBottom = 1;
}
if (base.defined.paddingLeft) {
paddingLeft = base.paddingLeft;
defined.paddingLeft = 1;
}
if (base.defined.paddingRight) {
paddingRight = base.paddingRight;
defined.paddingRight = 1;
}
}
// Compatibility accessors for existing code that uses hasX pattern
[[nodiscard]] bool hasTextAlign() const { return defined.alignment; }
[[nodiscard]] bool hasFontStyle() const { return defined.fontStyle; }
[[nodiscard]] bool hasFontWeight() const { return defined.fontWeight; }
[[nodiscard]] bool hasTextDecoration() const { return defined.decoration; }
[[nodiscard]] bool hasTextIndent() const { return defined.indent; }
[[nodiscard]] bool hasMarginTop() const { return defined.marginTop; }
[[nodiscard]] bool hasMarginBottom() const { return defined.marginBottom; }
[[nodiscard]] bool hasMarginLeft() const { return defined.marginLeft; }
[[nodiscard]] bool hasMarginRight() const { return defined.marginRight; }
[[nodiscard]] bool hasPaddingTop() const { return defined.paddingTop; }
[[nodiscard]] bool hasPaddingBottom() const { return defined.paddingBottom; }
[[nodiscard]] bool hasPaddingLeft() const { return defined.paddingLeft; }
[[nodiscard]] bool hasPaddingRight() const { return defined.paddingRight; }
// Merge another style (alias for applyOver for compatibility)
void merge(const CssStyle& other) { applyOver(other); }
void reset() {
alignment = TextAlign::None;
fontStyle = CssFontStyle::Normal;
fontWeight = CssFontWeight::Normal;
decoration = CssTextDecoration::None;
indent = CssLength{};
marginTop = marginBottom = marginLeft = marginRight = CssLength{};
paddingTop = paddingBottom = paddingLeft = paddingRight = CssLength{};
defined.clearAll();
}
};

View File

@ -22,6 +22,9 @@ constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
const char* ITALIC_TAGS[] = {"i", "em"}; const char* ITALIC_TAGS[] = {"i", "em"};
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]); constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
const char* UNDERLINE_TAGS[] = {"u", "ins"};
constexpr int NUM_UNDERLINE_TAGS = sizeof(UNDERLINE_TAGS) / sizeof(UNDERLINE_TAGS[0]);
const char* IMAGE_TAGS[] = {"img"}; const char* IMAGE_TAGS[] = {"img"};
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
@ -40,37 +43,124 @@ bool matches(const char* tag_name, const char* possible_tags[], const int possib
return false; return false;
} }
// Create a BlockStyle from CSS style properties, resolving CssLength values to pixels
// emSize is the current font line height, used for em/rem unit conversion
BlockStyle createBlockStyleFromCss(const CssStyle& cssStyle, const float emSize) {
BlockStyle blockStyle;
// Resolve all CssLength values to pixels using the current font's em size
const int16_t marginTopPx = cssStyle.marginTop.toPixelsInt16(emSize);
const int16_t marginBottomPx = cssStyle.marginBottom.toPixelsInt16(emSize);
const int16_t paddingTopPx = cssStyle.paddingTop.toPixelsInt16(emSize);
const int16_t paddingBottomPx = cssStyle.paddingBottom.toPixelsInt16(emSize);
// Vertical: combine margin and padding for top/bottom spacing
blockStyle.marginTop = static_cast<int16_t>(marginTopPx + paddingTopPx);
blockStyle.marginBottom = static_cast<int16_t>(marginBottomPx + paddingBottomPx);
blockStyle.paddingTop = paddingTopPx;
blockStyle.paddingBottom = paddingBottomPx;
// Horizontal: store margin and padding separately for layout calculations
blockStyle.marginLeft = cssStyle.marginLeft.toPixelsInt16(emSize);
blockStyle.marginRight = cssStyle.marginRight.toPixelsInt16(emSize);
blockStyle.paddingLeft = cssStyle.paddingLeft.toPixelsInt16(emSize);
blockStyle.paddingRight = cssStyle.paddingRight.toPixelsInt16(emSize);
// Text indent
blockStyle.textIndent = cssStyle.indent.toPixelsInt16(emSize);
blockStyle.textIndentDefined = cssStyle.defined.indent;
return blockStyle;
}
// Update effective bold/italic/underline based on block style and inline style stack
void ChapterHtmlSlimParser::updateEffectiveInlineStyle() {
// Start with block-level styles
effectiveBold = currentBlockStyle.hasFontWeight() && currentBlockStyle.fontWeight == CssFontWeight::Bold;
effectiveItalic = currentBlockStyle.hasFontStyle() && currentBlockStyle.fontStyle == CssFontStyle::Italic;
effectiveUnderline =
currentBlockStyle.hasTextDecoration() && currentBlockStyle.decoration == CssTextDecoration::Underline;
// Apply inline style stack in order
for (const auto& entry : inlineStyleStack) {
if (entry.hasBold) {
effectiveBold = entry.bold;
}
if (entry.hasItalic) {
effectiveItalic = entry.italic;
}
if (entry.hasUnderline) {
effectiveUnderline = entry.underline;
}
}
}
// flush the contents of partWordBuffer to currentTextBlock // flush the contents of partWordBuffer to currentTextBlock
void ChapterHtmlSlimParser::flushPartWordBuffer() { void ChapterHtmlSlimParser::flushPartWordBuffer() {
// determine font style // Determine font style from depth-based tracking and CSS effective style
const bool isBold = boldUntilDepth < depth || effectiveBold;
const bool isItalic = italicUntilDepth < depth || effectiveItalic;
const bool isUnderline = underlineUntilDepth < depth || effectiveUnderline;
EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR; EpdFontFamily::Style fontStyle = EpdFontFamily::REGULAR;
if (boldUntilDepth < depth && italicUntilDepth < depth) { if (isBold && isItalic) {
fontStyle = EpdFontFamily::BOLD_ITALIC; fontStyle = EpdFontFamily::BOLD_ITALIC;
} else if (boldUntilDepth < depth) { } else if (isBold) {
fontStyle = EpdFontFamily::BOLD; fontStyle = EpdFontFamily::BOLD;
} else if (italicUntilDepth < depth) { } else if (isItalic) {
fontStyle = EpdFontFamily::ITALIC; fontStyle = EpdFontFamily::ITALIC;
} }
// flush the buffer // flush the buffer
partWordBuffer[partWordBufferIndex] = '\0'; partWordBuffer[partWordBufferIndex] = '\0';
currentTextBlock->addWord(partWordBuffer, fontStyle); currentTextBlock->addWord(partWordBuffer, fontStyle, isUnderline);
partWordBufferIndex = 0; partWordBufferIndex = 0;
} }
// Merge block styles for nested block elements
// When a child block element is inside a parent with no direct text content,
// we accumulate their margins so nested containers properly contribute spacing
BlockStyle mergeBlockStyles(const BlockStyle& parent, const BlockStyle& child) {
BlockStyle merged;
// Vertical margins: sum them (nested blocks create additive spacing)
merged.marginTop = static_cast<int16_t>(parent.marginTop + child.marginTop);
merged.marginBottom = static_cast<int16_t>(parent.marginBottom + child.marginBottom);
// Horizontal margins: sum them (nested blocks create cumulative indentation)
merged.marginLeft = static_cast<int16_t>(parent.marginLeft + child.marginLeft);
merged.marginRight = static_cast<int16_t>(parent.marginRight + child.marginRight);
// Padding: sum them
merged.paddingTop = static_cast<int16_t>(parent.paddingTop + child.paddingTop);
merged.paddingBottom = static_cast<int16_t>(parent.paddingBottom + child.paddingBottom);
merged.paddingLeft = static_cast<int16_t>(parent.paddingLeft + child.paddingLeft);
merged.paddingRight = static_cast<int16_t>(parent.paddingRight + child.paddingRight);
// Text indent: use child's if defined, otherwise inherit parent's
if (child.textIndentDefined) {
merged.textIndent = child.textIndent;
merged.textIndentDefined = true;
} else if (parent.textIndentDefined) {
merged.textIndent = parent.textIndent;
merged.textIndentDefined = true;
}
return merged;
}
// start a new text block if needed // start a new text block if needed
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style, const BlockStyle& blockStyle) {
if (currentTextBlock) { if (currentTextBlock) {
// already have a text block running and it is empty - just reuse it // already have a text block running and it is empty - just reuse it
if (currentTextBlock->isEmpty()) { if (currentTextBlock->isEmpty()) {
currentTextBlock->setStyle(style); currentTextBlock->setStyle(style);
// Merge with existing block style to accumulate margins from parent block elements
// This handles cases like <div margin-bottom:2em><h1>text</h1></div> where the
// div's margin should be preserved even though it has no direct text content
const BlockStyle merged = mergeBlockStyles(currentTextBlock->getBlockStyle(), blockStyle);
currentTextBlock->setBlockStyle(merged);
return; return;
} }
makePages(); makePages();
} }
currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled)); currentTextBlock.reset(new ParsedText(style, extraParagraphSpacing, hyphenationEnabled, blockStyle));
} }
void ChapterHtmlSlimParser::startNewTextBlock(const TextBlock::Style style) { startNewTextBlock(style, BlockStyle{}); }
void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) { void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char* name, const XML_Char** atts) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
@ -80,6 +170,19 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
return; return;
} }
// Extract class and style attributes for CSS processing
std::string classAttr;
std::string styleAttr;
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "class") == 0) {
classAttr = atts[i + 1];
} else if (strcmp(atts[i], "style") == 0) {
styleAttr = atts[i + 1];
}
}
}
// Special handling for tables - show placeholder text instead of dropping silently // Special handling for tables - show placeholder text instead of dropping silently
if (strcmp(name, "table") == 0) { if (strcmp(name, "table") == 0) {
// Add placeholder text // Add placeholder text
@ -141,43 +244,155 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
} }
} }
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { // Determine if this is a block element
self->startNewTextBlock(TextBlock::CENTER_ALIGN); bool isBlockElement = matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS);
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->depth += 1; // Compute CSS style for this element
return; CssStyle cssStyle;
if (self->cssParser) {
// Get combined tag + class styles
cssStyle = self->cssParser->resolveStyle(name, classAttr);
// Merge inline style (highest priority)
if (!styleAttr.empty()) {
CssStyle inlineStyle = CssParser::parseInlineStyle(styleAttr);
cssStyle.merge(inlineStyle);
}
} }
if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) { if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
// Headers: center aligned, bold, apply CSS overrides
TextBlock::Style alignment = TextBlock::CENTER_ALIGN;
if (cssStyle.hasTextAlign()) {
switch (cssStyle.alignment) {
case TextAlign::Left:
alignment = TextBlock::LEFT_ALIGN;
break;
case TextAlign::Right:
alignment = TextBlock::RIGHT_ALIGN;
break;
case TextAlign::Center:
alignment = TextBlock::CENTER_ALIGN;
break;
case TextAlign::Justify:
alignment = TextBlock::JUSTIFIED;
break;
default:
break;
}
}
self->currentBlockStyle = cssStyle;
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId)));
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->updateEffectiveInlineStyle();
} else if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
if (strcmp(name, "br") == 0) { if (strcmp(name, "br") == 0) {
if (self->partWordBufferIndex > 0) { if (self->partWordBufferIndex > 0) {
// flush word preceding <br/> to currentTextBlock before calling startNewTextBlock // flush word preceding <br/> to currentTextBlock before calling startNewTextBlock
self->flushPartWordBuffer(); self->flushPartWordBuffer();
} }
self->startNewTextBlock(self->currentTextBlock->getStyle()); self->startNewTextBlock(self->currentTextBlock->getStyle());
self->depth += 1; } else {
return; // Determine alignment from CSS or default
auto alignment = static_cast<TextBlock::Style>(self->paragraphAlignment);
if (cssStyle.hasTextAlign()) {
switch (cssStyle.alignment) {
case TextAlign::Left:
alignment = TextBlock::LEFT_ALIGN;
break;
case TextAlign::Right:
alignment = TextBlock::RIGHT_ALIGN;
break;
case TextAlign::Center:
alignment = TextBlock::CENTER_ALIGN;
break;
case TextAlign::Justify:
alignment = TextBlock::JUSTIFIED;
break;
default:
break;
}
}
self->currentBlockStyle = cssStyle;
self->startNewTextBlock(alignment, createBlockStyleFromCss(cssStyle, self->renderer.getLineHeight(self->fontId)));
self->updateEffectiveInlineStyle();
if (strcmp(name, "li") == 0) {
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR);
}
} }
} else if (matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS)) {
self->startNewTextBlock(static_cast<TextBlock::Style>(self->paragraphAlignment)); self->underlineUntilDepth = std::min(self->underlineUntilDepth, self->depth);
if (strcmp(name, "li") == 0) { // Push inline style entry for underline tag
self->currentTextBlock->addWord("\xe2\x80\xa2", EpdFontFamily::REGULAR); StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasUnderline = true;
entry.underline = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
} }
if (cssStyle.hasFontStyle()) {
self->depth += 1; entry.hasItalic = true;
return; entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
} }
self->inlineStyleStack.push_back(entry);
if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) { self->updateEffectiveInlineStyle();
} else if (matches(name, BOLD_TAGS, NUM_BOLD_TAGS)) {
self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth); self->boldUntilDepth = std::min(self->boldUntilDepth, self->depth);
self->depth += 1; // Push inline style entry for bold tag
return; StyleStackEntry entry;
} entry.depth = self->depth; // Track depth for matching pop
entry.hasBold = true;
if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) { entry.bold = true;
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth); self->italicUntilDepth = std::min(self->italicUntilDepth, self->depth);
self->depth += 1; // Push inline style entry for italic tag
return; StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
entry.hasItalic = true;
entry.italic = true;
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
} else if (strcmp(name, "span") == 0 || !isBlockElement) {
// Handle span and other inline elements for CSS styling
if (cssStyle.hasFontWeight() || cssStyle.hasFontStyle() || cssStyle.hasTextDecoration()) {
StyleStackEntry entry;
entry.depth = self->depth; // Track depth for matching pop
if (cssStyle.hasFontWeight()) {
entry.hasBold = true;
entry.bold = cssStyle.fontWeight == CssFontWeight::Bold;
}
if (cssStyle.hasFontStyle()) {
entry.hasItalic = true;
entry.italic = cssStyle.fontStyle == CssFontStyle::Italic;
}
if (cssStyle.hasTextDecoration()) {
entry.hasUnderline = true;
entry.underline = cssStyle.decoration == CssTextDecoration::Underline;
}
self->inlineStyleStack.push_back(entry);
self->updateEffectiveInlineStyle();
}
} }
// Unprocessed tag, just increasing depth and continue forward // Unprocessed tag, just increasing depth and continue forward
@ -239,17 +454,27 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) { void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* name) {
auto* self = static_cast<ChapterHtmlSlimParser*>(userData); auto* self = static_cast<ChapterHtmlSlimParser*>(userData);
if (self->partWordBufferIndex > 0) { // Check if any style state will change after we decrement depth
// Only flush out part word buffer if we're closing a block tag or are at the top of the HTML file. // If so, we MUST flush the partWordBuffer with the CURRENT style first
// We don't want to flush out content when closing inline tags like <span>. // Note: depth hasn't been decremented yet, so we check against (depth - 1)
// Currently this also flushes out on closing <b> and <i> tags, but they are line tags so that shouldn't happen, const bool willPopStyleStack =
// text styling needs to be overhauled to fix it. !self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth - 1;
const bool shouldBreakText = const bool willClearBold = self->boldUntilDepth == self->depth - 1;
matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || const bool willClearItalic = self->italicUntilDepth == self->depth - 1;
matches(name, BOLD_TAGS, NUM_BOLD_TAGS) || matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) || const bool willClearUnderline = self->underlineUntilDepth == self->depth - 1;
strcmp(name, "table") == 0 || matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
if (shouldBreakText) { const bool styleWillChange = willPopStyleStack || willClearBold || willClearItalic || willClearUnderline;
// Flush buffer with current style BEFORE any style changes
if (self->partWordBufferIndex > 0) {
// Flush if style will change OR if we're closing a block/structural element
const bool shouldFlush = styleWillChange || matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) ||
matches(name, HEADER_TAGS, NUM_HEADER_TAGS) || matches(name, BOLD_TAGS, NUM_BOLD_TAGS) ||
matches(name, ITALIC_TAGS, NUM_ITALIC_TAGS) ||
matches(name, UNDERLINE_TAGS, NUM_UNDERLINE_TAGS) || strcmp(name, "table") == 0 ||
matches(name, IMAGE_TAGS, NUM_IMAGE_TAGS) || self->depth == 1;
if (shouldFlush) {
self->flushPartWordBuffer(); self->flushPartWordBuffer();
} }
} }
@ -261,15 +486,33 @@ void XMLCALL ChapterHtmlSlimParser::endElement(void* userData, const XML_Char* n
self->skipUntilDepth = INT_MAX; self->skipUntilDepth = INT_MAX;
} }
// Leaving bold // Leaving bold tag
if (self->boldUntilDepth == self->depth) { if (self->boldUntilDepth == self->depth) {
self->boldUntilDepth = INT_MAX; self->boldUntilDepth = INT_MAX;
} }
// Leaving italic // Leaving italic tag
if (self->italicUntilDepth == self->depth) { if (self->italicUntilDepth == self->depth) {
self->italicUntilDepth = INT_MAX; self->italicUntilDepth = INT_MAX;
} }
// Leaving underline tag
if (self->underlineUntilDepth == self->depth) {
self->underlineUntilDepth = INT_MAX;
}
// Pop from inline style stack if we pushed an entry at this depth
// This handles all inline elements: b, i, u, span, etc.
if (!self->inlineStyleStack.empty() && self->inlineStyleStack.back().depth == self->depth) {
self->inlineStyleStack.pop_back();
self->updateEffectiveInlineStyle();
}
// Clear block style when leaving block elements
if (matches(name, BLOCK_TAGS, NUM_BLOCK_TAGS) || matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->currentBlockStyle.reset();
self->updateEffectiveInlineStyle();
}
} }
bool ChapterHtmlSlimParser::parseAndBuildPages() { bool ChapterHtmlSlimParser::parseAndBuildPages() {
@ -373,7 +616,9 @@ void ChapterHtmlSlimParser::addLineToPage(std::shared_ptr<TextBlock> line) {
currentPageNextY = 0; currentPageNextY = 0;
} }
currentPage->elements.push_back(std::make_shared<PageLine>(line, 0, currentPageNextY)); // Apply horizontal left inset (margin + padding) as x position offset
const int16_t xOffset = line->getBlockStyle().leftInset();
currentPage->elements.push_back(std::make_shared<PageLine>(line, xOffset, currentPageNextY));
currentPageNextY += lineHeight; currentPageNextY += lineHeight;
} }
@ -389,10 +634,28 @@ void ChapterHtmlSlimParser::makePages() {
} }
const int lineHeight = renderer.getLineHeight(fontId) * lineCompression; const int lineHeight = renderer.getLineHeight(fontId) * lineCompression;
// Apply marginTop before the paragraph (stored in pixels)
const BlockStyle& blockStyle = currentTextBlock->getBlockStyle();
if (blockStyle.marginTop > 0) {
currentPageNextY += blockStyle.marginTop;
}
// Calculate effective width accounting for horizontal margins/padding
const int horizontalInset = blockStyle.totalHorizontalInset();
const uint16_t effectiveWidth =
(horizontalInset < viewportWidth) ? static_cast<uint16_t>(viewportWidth - horizontalInset) : viewportWidth;
currentTextBlock->layoutAndExtractLines( currentTextBlock->layoutAndExtractLines(
renderer, fontId, viewportWidth, renderer, fontId, effectiveWidth,
[this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); }); [this](const std::shared_ptr<TextBlock>& textBlock) { addLineToPage(textBlock); });
// Extra paragraph spacing if enabled
// Apply marginBottom after the paragraph (stored in pixels)
if (blockStyle.marginBottom > 0) {
currentPageNextY += blockStyle.marginBottom;
}
// Extra paragraph spacing if enabled (default behavior)
if (extraParagraphSpacing) { if (extraParagraphSpacing) {
currentPageNextY += lineHeight / 2; currentPageNextY += lineHeight / 2;
} }

View File

@ -8,6 +8,8 @@
#include "../ParsedText.h" #include "../ParsedText.h"
#include "../blocks/TextBlock.h" #include "../blocks/TextBlock.h"
#include "../css/CssParser.h"
#include "../css/CssStyle.h"
class Page; class Page;
class GfxRenderer; class GfxRenderer;
@ -23,6 +25,7 @@ class ChapterHtmlSlimParser {
int skipUntilDepth = INT_MAX; int skipUntilDepth = INT_MAX;
int boldUntilDepth = INT_MAX; int boldUntilDepth = INT_MAX;
int italicUntilDepth = INT_MAX; int italicUntilDepth = INT_MAX;
int underlineUntilDepth = INT_MAX;
// buffer for building up words from characters, will auto break if longer than this // buffer for building up words from characters, will auto break if longer than this
// leave one char at end for null pointer // leave one char at end for null pointer
char partWordBuffer[MAX_WORD_SIZE + 1] = {}; char partWordBuffer[MAX_WORD_SIZE + 1] = {};
@ -37,7 +40,23 @@ class ChapterHtmlSlimParser {
uint16_t viewportWidth; uint16_t viewportWidth;
uint16_t viewportHeight; uint16_t viewportHeight;
bool hyphenationEnabled; bool hyphenationEnabled;
const CssParser* cssParser;
// Style tracking (replaces depth-based approach)
struct StyleStackEntry {
int depth = 0;
bool hasBold = false, bold = false;
bool hasItalic = false, italic = false;
bool hasUnderline = false, underline = false;
};
std::vector<StyleStackEntry> inlineStyleStack;
CssStyle currentBlockStyle;
bool effectiveBold = false;
bool effectiveItalic = false;
bool effectiveUnderline = false;
void updateEffectiveInlineStyle();
void startNewTextBlock(TextBlock::Style style, const BlockStyle& blockStyle);
void startNewTextBlock(TextBlock::Style style); void startNewTextBlock(TextBlock::Style style);
void flushPartWordBuffer(); void flushPartWordBuffer();
void makePages(); void makePages();
@ -52,7 +71,8 @@ class ChapterHtmlSlimParser {
const uint8_t paragraphAlignment, const uint16_t viewportWidth, const uint8_t paragraphAlignment, const uint16_t viewportWidth,
const uint16_t viewportHeight, const bool hyphenationEnabled, const uint16_t viewportHeight, const bool hyphenationEnabled,
const std::function<void(std::unique_ptr<Page>)>& completePageFn, const std::function<void(std::unique_ptr<Page>)>& completePageFn,
const std::function<void(int)>& progressFn = nullptr) const std::function<void(int)>& progressFn = nullptr,
const CssParser* cssParser = nullptr)
: filepath(filepath), : filepath(filepath),
renderer(renderer), renderer(renderer),
fontId(fontId), fontId(fontId),
@ -63,7 +83,8 @@ class ChapterHtmlSlimParser {
viewportHeight(viewportHeight), viewportHeight(viewportHeight),
hyphenationEnabled(hyphenationEnabled), hyphenationEnabled(hyphenationEnabled),
completePageFn(completePageFn), completePageFn(completePageFn),
progressFn(progressFn) {} progressFn(progressFn),
cssParser(cssParser) {}
~ChapterHtmlSlimParser() = default; ~ChapterHtmlSlimParser() = default;
bool parseAndBuildPages(); bool parseAndBuildPages();
void addLineToPage(std::shared_ptr<TextBlock> line); void addLineToPage(std::shared_ptr<TextBlock> line);

View File

@ -8,6 +8,7 @@
namespace { namespace {
constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml"; constexpr char MEDIA_TYPE_NCX[] = "application/x-dtbncx+xml";
constexpr char MEDIA_TYPE_CSS[] = "text/css";
constexpr char itemCacheFile[] = "/.items.bin"; constexpr char itemCacheFile[] = "/.items.bin";
} // namespace } // namespace
@ -218,6 +219,11 @@ void XMLCALL ContentOpfParser::startElement(void* userData, const XML_Char* name
} }
} }
// Collect CSS files
if (mediaType == MEDIA_TYPE_CSS) {
self->cssFiles.push_back(href);
}
// EPUB 3: Check for nav document (properties contains "nav") // EPUB 3: Check for nav document (properties contains "nav")
if (!properties.empty() && self->tocNavPath.empty()) { if (!properties.empty() && self->tocNavPath.empty()) {
// Properties is space-separated, check if "nav" is present as a word // Properties is space-separated, check if "nav" is present as a word

View File

@ -64,6 +64,7 @@ class ContentOpfParser final : public Print {
std::string tocNavPath; // EPUB 3 nav document path std::string tocNavPath; // EPUB 3 nav document path
std::string coverItemHref; std::string coverItemHref;
std::string textReferenceHref; std::string textReferenceHref;
std::vector<std::string> cssFiles; // CSS stylesheet paths
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize, explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
BookMetadataCache* cache) BookMetadataCache* cache)

View File

@ -462,6 +462,20 @@ int GfxRenderer::getSpaceWidth(const int fontId) const {
return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX; return fontMap.at(fontId).getGlyph(' ', EpdFontFamily::REGULAR)->advanceX;
} }
int GfxRenderer::getIndentWidth(const int fontId, const char* text) const {
if (fontMap.count(fontId) == 0) {
Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);
return 0;
}
uint32_t cp;
int width = 0;
while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&text)))) {
width += fontMap.at(fontId).getGlyph(cp, EpdFontFamily::REGULAR)->advanceX;
}
return width;
}
int GfxRenderer::getFontAscenderSize(const int fontId) const { int GfxRenderer::getFontAscenderSize(const int fontId) const {
if (fontMap.count(fontId) == 0) { if (fontMap.count(fontId) == 0) {
Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId); Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);

View File

@ -78,6 +78,7 @@ class GfxRenderer {
void drawText(int fontId, int x, int y, const char* text, bool black = true, void drawText(int fontId, int x, int y, const char* text, bool black = true,
EpdFontFamily::Style style = EpdFontFamily::REGULAR) const; EpdFontFamily::Style style = EpdFontFamily::REGULAR) const;
int getSpaceWidth(int fontId) const; int getSpaceWidth(int fontId) const;
int getIndentWidth(int fontId, const char* text) const;
int getFontAscenderSize(int fontId) const; int getFontAscenderSize(int fontId) const;
int getLineHeight(int fontId) const; int getLineHeight(int fontId) const;
std::string truncatedText(int fontId, const char* text, int maxWidth, std::string truncatedText(int fontId, const char* text, int maxWidth,