Xteink-X4-crosspoint-reader/lib/Epub/Epub/parsers/ContentOpfParser.h
Arthur Tazhitdinov 8824c87490
feat: dict based Hyphenation (#305)
## Summary

* Adds (optional) Hyphenation for English, French, German, Russian
languages

## Additional Context

* Included hyphenation dictionaries add approximately 280kb to the flash
usage (German alone takes 200kb)
* Trie encoded dictionaries are adopted from hypher project
(https://github.com/typst/hypher)
* Soft hyphens (and other explicit hyphens) take precedence over
dict-based hyphenation. Overall, the hyphenation rules are quite
aggressive, as I believe it makes more sense on our smaller screen.

---------

Co-authored-by: Dave Allie <dave@daveallie.com>
2026-01-19 12:56:26 +00:00

54 lines
1.4 KiB
C++

#pragma once
#include <Print.h>
#include "Epub.h"
#include "expat.h"
class BookMetadataCache;
class ContentOpfParser final : public Print {
enum ParserState {
START,
IN_PACKAGE,
IN_METADATA,
IN_BOOK_TITLE,
IN_BOOK_AUTHOR,
IN_BOOK_LANGUAGE,
IN_MANIFEST,
IN_SPINE,
IN_GUIDE,
};
const std::string& cachePath;
const std::string& baseContentPath;
size_t remainingSize;
XML_Parser parser = nullptr;
ParserState state = START;
BookMetadataCache* cache;
FsFile tempItemStore;
std::string coverItemId;
static void startElement(void* userData, const XML_Char* name, const XML_Char** atts);
static void characterData(void* userData, const XML_Char* s, int len);
static void endElement(void* userData, const XML_Char* name);
public:
std::string title;
std::string author;
std::string language;
std::string tocNcxPath;
std::string tocNavPath; // EPUB 3 nav document path
std::string coverItemHref;
std::string textReferenceHref;
explicit ContentOpfParser(const std::string& cachePath, const std::string& baseContentPath, const size_t xmlSize,
BookMetadataCache* cache)
: cachePath(cachePath), baseContentPath(baseContentPath), remainingSize(xmlSize), cache(cache) {}
~ContentOpfParser() override;
bool setup();
size_t write(uint8_t) override;
size_t write(const uint8_t* buffer, size_t size) override;
};