Fix typographic rendering issues: precise advance widths and detached punctuation

2026-02-04 22:57:50 +03:00 · 2026-01-22 18:42:45 -05:00 · 2026-01-22 18:42:45 -05:00 · cf57ad0ca9
commit cf57ad0ca9
parent 155914f004
12 changed files with 86 additions and 14 deletions
--- a/docs/CHANGES.md
+++ b/docs/CHANGES.md
@ -80,3 +80,9 @@ The EPUB reader core was modified to improve stability, performance, and memory
 *   **V0 Format Fix**: Fixed a regression in V0 font loading where the header read was truncated to 32 bytes (instead of 48), restoring support for `LibreBaskerville` and other legacy fonts.
 *   **Flexible Discovery**: Updated `FontManager` to support `Family_Style_Size` (underscore-separated) naming conventions, enabling compatibility with a wider range of auto-generated filenames.
 *   **Documentation**: Rewrote `FONT_CONVERSION.md` to cover both the Python script and the new web converter.
+
+### Update: Typographic Rendering Improvements (2026-01-22)
+
+*   **Precise Character Spacing**: Implemented `getTextAdvance` to use typographic advance widths instead of visual bounding boxes for layout. This fixes clipping issues with characters like em-dashes.
+*   **Punctuation Attachment**: Added logic to `ParsedText` to "attach" punctuation (., ,, ;, etc.) to the preceding word, ensuring no visual gap appears between the word and the punctuation mark, even when line breaking occurs.
+*   **Font Converter Precision**: Updated `lib/EpdFont/scripts/fontconvert.py` to use rounding instead of flooring for advance width calculations and fixed a binary file writing bug, resulting in higher quality generated fonts.
--- a/docs/FONT_CONVERSION.md
+++ b/docs/FONT_CONVERSION.md
@ -34,9 +34,9 @@ For best results, rename your downloaded file to match one of these patterns:

 **Note:** If you download a file named just `Aileron.epdfont`, the reader will try to load it, but using the explicit naming convention above ensures the correct style and size are recognized.

-## Method 2: Python Script (Legacy)
+## Method 2: Python Script (Improved)

-You can also use the included Python script located at `lib/EpdFont/scripts/fontconvert.py`.
+You can also use the included Python script located at `lib/EpdFont/scripts/fontconvert.py`. This script has been recently updated to ensure high-precision metric calculations (fixing issues with spacing and em-dashes).

 ### Requirements
 - Python 3
--- a/lib/EpdFont/EpdFont.cpp
+++ b/lib/EpdFont/EpdFont.cpp
@ -47,6 +47,25 @@ void EpdFont::getTextDimensions(const char* string, int* w, int* h, const EpdFon
  *h = maxY - minY;
 }

+int EpdFont::getTextAdvance(const char* string, const EpdFontStyles::Style style) const {
+  if (string == nullptr || *string == '\0') {
+    return 0;
+  }
+
+  int advance = 0;
+  uint32_t cp;
+  while ((cp = utf8NextCodepoint(reinterpret_cast<const uint8_t**>(&string)))) {
+    const EpdGlyph* glyph = getGlyph(cp, style);
+    if (!glyph) {
+      glyph = getGlyph('?', style);
+    }
+    if (glyph) {
+      advance += glyph->advanceX;
+    }
+  }
+  return advance;
+}
+
 bool EpdFont::hasPrintableChars(const char* string, const EpdFontStyles::Style style) const {
  int w = 0, h = 0;

--- a/lib/EpdFont/EpdFont.h
+++ b/lib/EpdFont/EpdFont.h
@ -14,6 +14,7 @@ class EpdFont {

  void getTextDimensions(const char* string, int* w, int* h,
                         const EpdFontStyles::Style style = EpdFontStyles::REGULAR) const;
+  int getTextAdvance(const char* string, const EpdFontStyles::Style style = EpdFontStyles::REGULAR) const;
  bool hasPrintableChars(const char* string, const EpdFontStyles::Style style = EpdFontStyles::REGULAR) const;

  virtual const EpdGlyph* getGlyph(uint32_t cp, const EpdFontStyles::Style style = EpdFontStyles::REGULAR) const;
--- a/lib/EpdFont/EpdFontFamily.cpp
+++ b/lib/EpdFont/EpdFontFamily.cpp
@ -26,6 +26,10 @@ void EpdFontFamily::getTextDimensions(const char* string, int* w, int* h, const
  getFont(style)->getTextDimensions(string, w, h, style);
 }

+int EpdFontFamily::getTextAdvance(const char* string, const Style style) const {
+  return getFont(style)->getTextAdvance(string, style);
+}
+
 bool EpdFontFamily::hasPrintableChars(const char* string, const Style style) const {
  return getFont(style)->hasPrintableChars(string, style);
 }
--- a/lib/EpdFont/EpdFontFamily.h
+++ b/lib/EpdFont/EpdFontFamily.h
@ -16,6 +16,7 @@ class EpdFontFamily {
      : regular(regular), bold(bold), italic(italic), boldItalic(boldItalic) {}
  ~EpdFontFamily() = default;
  void getTextDimensions(const char* string, int* w, int* h, Style style = EpdFontStyles::REGULAR) const;
+  int getTextAdvance(const char* string, Style style = EpdFontStyles::REGULAR) const;
  bool hasPrintableChars(const char* string, Style style = EpdFontStyles::REGULAR) const;
  const EpdFontData* getData(Style style = EpdFontStyles::REGULAR) const;
  const EpdGlyph* getGlyph(uint32_t cp, Style style = EpdFontStyles::REGULAR) const;
--- a/lib/EpdFont/scripts/fontconvert.py
+++ b/lib/EpdFont/scripts/fontconvert.py
@ -74,6 +74,9 @@ def norm_floor(val):
 def norm_ceil(val):
    return int(math.ceil(val / (1 << 6)))

+def norm_round(val):
+    return int(round(val / 64.0))
+
 def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]
@ -188,7 +191,7 @@ for i_start, i_end in intervals:
        glyph = GlyphProps(
            width = bitmap.width,
            height = bitmap.rows,
-            advance_x = norm_floor(face.glyph.advance.x),
+            advance_x = norm_round(face.glyph.advance.x),
            left = face.glyph.bitmap_left,
            top = face.glyph.bitmap_top,
            data_length = len(packed),
@ -265,8 +268,6 @@ if isBinary:
        
        # Bitmaps
        f.write(bytes(glyph_data))
-        # Bitmaps
-        f.write(bytes(glyph_data))
    print(f"Generated {font_name}.epdfont")
 else:
    print(f"/**\n * generated by fontconvert.py\n * name: {font_name}\n * size: {size}\n * mode: {'2-bit' if is2Bit else '1-bit'}\n */")
--- a/lib/Epub/Epub/ParsedText.cpp
+++ b/lib/Epub/Epub/ParsedText.cpp
@ -8,6 +8,13 @@
 #include <limits>
 #include <vector>

+// ASCII Punctuation and symbols that should attach to the previous word
+bool isAttachedPunctuation(const std::string& word) {
+  if (word.empty()) return false;
+  const char c = word[0];
+  return c == '.' || c == ',' || c == ';' || c == ':' || c == '!' || c == '?' || c == ')' || c == ']' || c == '}';
+}
+
 constexpr int MAX_COST = std::numeric_limits<int>::max();

 void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle) {
@ -29,11 +36,18 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
  const int pageWidth = viewportWidth;
  const int spaceWidth = renderer.getSpaceWidth(fontId);
  const auto wordWidths = calculateWordWidths(renderer, fontId);
-  const auto lineBreakIndices = computeLineBreaks(pageWidth, spaceWidth, wordWidths);
+
+  std::vector<bool> attachToPrevious;
+  attachToPrevious.reserve(words.size());
+  for (const auto& w : words) {
+    attachToPrevious.push_back(isAttachedPunctuation(w));
+  }
+
+  const auto lineBreakIndices = computeLineBreaks(pageWidth, spaceWidth, wordWidths, attachToPrevious);
  const size_t lineCount = includeLastLine ? lineBreakIndices.size() : lineBreakIndices.size() - 1;

  for (size_t i = 0; i < lineCount; ++i) {
-    extractLine(i, pageWidth, spaceWidth, wordWidths, lineBreakIndices, processLine);
+    extractLine(i, pageWidth, spaceWidth, wordWidths, lineBreakIndices, attachToPrevious, processLine);
  }
 }

@ -53,7 +67,7 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
  auto wordStylesIt = wordStyles.begin();

  while (wordsIt != words.end()) {
-    wordWidths.push_back(renderer.getTextWidth(fontId, wordsIt->c_str(), *wordStylesIt));
+    wordWidths.push_back(renderer.getTextAdvance(fontId, wordsIt->c_str(), *wordStylesIt));

    std::advance(wordsIt, 1);
    std::advance(wordStylesIt, 1);
@ -63,7 +77,8 @@ std::vector<uint16_t> ParsedText::calculateWordWidths(const GfxRenderer& rendere
 }

 std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int spaceWidth,
-                                                  const std::vector<uint16_t>& wordWidths) const {
+                                                  const std::vector<uint16_t>& wordWidths,
+                                                  const std::vector<bool>& attachToPrevious) const {
  const size_t totalWordCount = words.size();

  // DP table to store the minimum badness (cost) of lines starting at index i
@ -81,7 +96,9 @@ std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int

    for (size_t j = i; j < totalWordCount; ++j) {
      // Current line length: previous width + space + current word width
-      currlen += wordWidths[j] + spaceWidth;
+      // Don't add space if the current word attaches to the previous one
+      const int gap = (j > i && attachToPrevious[j]) ? 0 : spaceWidth;
+      currlen += wordWidths[j] + gap;

      if (currlen > pageWidth) {
        break;
@ -143,6 +160,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const int pageWidth, const int

 void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const int spaceWidth,
                             const std::vector<uint16_t>& wordWidths, const std::vector<size_t>& lineBreakIndices,
+                             const std::vector<bool>& attachToPrevious,
                             const std::function<void(std::shared_ptr<TextBlock>)>& processLine) {
  const size_t lineBreak = lineBreakIndices[breakIndex];
  const size_t lastBreakAt = breakIndex > 0 ? lineBreakIndices[breakIndex - 1] : 0;
@ -161,7 +179,13 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
  const bool isLastLine = breakIndex == lineBreakIndices.size() - 1;

  if (style == TextBlock::JUSTIFIED && !isLastLine && lineWordCount >= 2) {
-    spacing = spareSpace / (lineWordCount - 1);
+    int gaps = 0;
+    for (size_t i = lastBreakAt + 1; i < lineBreak; i++) {
+      if (!attachToPrevious[i]) gaps++;
+    }
+    if (gaps > 0) {
+      spacing = spareSpace / gaps;
+    }
  }

  // Calculate initial x position
@ -175,6 +199,11 @@ void ParsedText::extractLine(const size_t breakIndex, const int pageWidth, const
  // Pre-calculate X positions for words
  std::list<uint16_t> lineXPos;
  for (size_t i = lastBreakAt; i < lineBreak; i++) {
+    // If this word attaches to previous, remove the spacing added by the previous iteration
+    if (i > lastBreakAt && attachToPrevious[i]) {
+      xpos -= spacing;
+    }
+
    const uint16_t currentWordWidth = wordWidths[i];
    lineXPos.push_back(xpos);
    xpos += currentWordWidth + spacing;
--- a/lib/Epub/Epub/ParsedText.h
+++ b/lib/Epub/Epub/ParsedText.h
@ -18,9 +18,10 @@ class ParsedText {
  TextBlock::Style style;
  bool extraParagraphSpacing;

-  std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths) const;
+  std::vector<size_t> computeLineBreaks(int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
+                                        const std::vector<bool>& attachToPrevious) const;
  void extractLine(size_t breakIndex, int pageWidth, int spaceWidth, const std::vector<uint16_t>& wordWidths,
-                   const std::vector<size_t>& lineBreakIndices,
+                   const std::vector<size_t>& lineBreakIndices, const std::vector<bool>& attachToPrevious,
                   const std::function<void(std::shared_ptr<TextBlock>)>& processLine);
  std::vector<uint16_t> calculateWordWidths(const GfxRenderer& renderer, int fontId);

--- a/lib/Epub/Epub/Section.cpp
+++ b/lib/Epub/Epub/Section.cpp
@ -7,7 +7,7 @@
 #include "parsers/ChapterHtmlSlimParser.h"

 namespace {
-constexpr uint8_t SECTION_FILE_VERSION = 10;
+constexpr uint8_t SECTION_FILE_VERSION = 12;
 constexpr uint32_t HEADER_SIZE = sizeof(uint8_t) + sizeof(int) + sizeof(float) + sizeof(bool) + sizeof(uint8_t) +
                                 sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t);
 }  // namespace
--- a/lib/GfxRenderer/GfxRenderer.cpp
+++ b/lib/GfxRenderer/GfxRenderer.cpp
@ -83,6 +83,15 @@ int GfxRenderer::getTextWidth(const int fontId, const char* text, const EpdFontF
  return w;
 }

+int GfxRenderer::getTextAdvance(const int fontId, const char* text, const EpdFontFamily::Style style) const {
+  if (fontMap.count(fontId) == 0) {
+    Serial.printf("[%lu] [GFX] Font %d not found\n", millis(), fontId);
+    return 0;
+  }
+
+  return fontMap.at(fontId).getTextAdvance(text, style);
+}
+
 void GfxRenderer::drawCenteredText(const int fontId, const int y, const char* text, const bool black,
                                   const EpdFontFamily::Style style) const {
  const int x = (getScreenWidth() - getTextWidth(fontId, text, style)) / 2;
--- a/lib/GfxRenderer/GfxRenderer.h
+++ b/lib/GfxRenderer/GfxRenderer.h
@ -76,6 +76,7 @@ class GfxRenderer {

  // Text
  int getTextWidth(int fontId, const char* text, EpdFontFamily::Style style = EpdFontStyles::REGULAR) const;
+  int getTextAdvance(int fontId, const char* text, EpdFontFamily::Style style = EpdFontStyles::REGULAR) const;
  void drawCenteredText(int fontId, int y, const char* text, bool black = true,
                        EpdFontFamily::Style style = EpdFontStyles::REGULAR) const;
  void drawText(int fontId, int x, int y, const char* text, bool black = true,