Xteink-X4-crosspoint-reader/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.h
Arthur Tazhitdinov 170e16033e feat: dict based Hyphenation (#305)
* Adds (optional) Hyphenation for English, French, German, Russian
languages

* Included hyphenation dictionaries add approximately 280kb to the flash
usage (German alone takes 200kb)
* Trie encoded dictionaries are adopted from hypher project
(https://github.com/typst/hypher)
* Soft hyphens (and other explicit hyphens) take precedence over
dict-based hyphenation. Overall, the hyphenation rules are quite
aggressive, as I believe it makes more sense on our smaller screen.

---------

Co-authored-by: Dave Allie <dave@daveallie.com>
2026-01-25 10:34:46 -08:00

70 lines
2.5 KiB
C++

#pragma once
#include <expat.h>
#include <climits>
#include <functional>
#include <memory>
#include "../ParsedText.h"
#include "../blocks/TextBlock.h"
class Page;
class GfxRenderer;
#define MAX_WORD_SIZE 200
class ChapterHtmlSlimParser {
const std::string& filepath;
GfxRenderer& renderer;
std::function<void(std::unique_ptr<Page>)> completePageFn;
std::function<void(int)> progressFn; // Progress callback (0-100)
int depth = 0;
int skipUntilDepth = INT_MAX;
int boldUntilDepth = INT_MAX;
int italicUntilDepth = INT_MAX;
// buffer for building up words from characters, will auto break if longer than this
// leave one char at end for null pointer
char partWordBuffer[MAX_WORD_SIZE + 1] = {};
int partWordBufferIndex = 0;
std::unique_ptr<ParsedText> currentTextBlock = nullptr;
std::unique_ptr<Page> currentPage = nullptr;
int16_t currentPageNextY = 0;
int fontId;
float lineCompression;
bool extraParagraphSpacing;
uint8_t paragraphAlignment;
uint16_t viewportWidth;
uint16_t viewportHeight;
bool hyphenationEnabled;
void startNewTextBlock(TextBlock::Style style);
void makePages();
// XML callbacks
static void XMLCALL startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void XMLCALL characterData(void* userData, const XML_Char* s, int len);
static void XMLCALL endElement(void* userData, const XML_Char* name);
public:
explicit ChapterHtmlSlimParser(const std::string& filepath, GfxRenderer& renderer, const int fontId,
const float lineCompression, const bool extraParagraphSpacing,
const uint8_t paragraphAlignment, const uint16_t viewportWidth,
const uint16_t viewportHeight, const bool hyphenationEnabled,
const std::function<void(std::unique_ptr<Page>)>& completePageFn,
const std::function<void(int)>& progressFn = nullptr)
: filepath(filepath),
renderer(renderer),
fontId(fontId),
lineCompression(lineCompression),
extraParagraphSpacing(extraParagraphSpacing),
paragraphAlignment(paragraphAlignment),
viewportWidth(viewportWidth),
viewportHeight(viewportHeight),
hyphenationEnabled(hyphenationEnabled),
completePageFn(completePageFn),
progressFn(progressFn) {}
~ChapterHtmlSlimParser() = default;
bool parseAndBuildPages();
void addLineToPage(std::shared_ptr<TextBlock> line);
};