Xteink-X4-crosspoint-reader/lib/Epub/Epub/hyphenation/LanguageRegistry.cpp
Arthur Tazhitdinov 8824c87490
feat: dict based Hyphenation (#305)
## Summary

* Adds (optional) Hyphenation for English, French, German, Russian
languages

## Additional Context

* Included hyphenation dictionaries add approximately 280kb to the flash
usage (German alone takes 200kb)
* Trie encoded dictionaries are adopted from hypher project
(https://github.com/typst/hypher)
* Soft hyphens (and other explicit hyphens) take precedence over
dict-based hyphenation. Overall, the hyphenation rules are quite
aggressive, as I believe it makes more sense on our smaller screen.

---------

Co-authored-by: Dave Allie <dave@daveallie.com>
2026-01-19 12:56:26 +00:00

43 lines
1.6 KiB
C++

#include "LanguageRegistry.h"
#include <algorithm>
#include <array>
#include "HyphenationCommon.h"
#include "generated/hyph-de.trie.h"
#include "generated/hyph-en.trie.h"
#include "generated/hyph-fr.trie.h"
#include "generated/hyph-ru.trie.h"
namespace {
// English hyphenation patterns (3/3 minimum prefix/suffix length)
LanguageHyphenator englishHyphenator(en_us_patterns, isLatinLetter, toLowerLatin, 3, 3);
LanguageHyphenator frenchHyphenator(fr_patterns, isLatinLetter, toLowerLatin);
LanguageHyphenator germanHyphenator(de_patterns, isLatinLetter, toLowerLatin);
LanguageHyphenator russianHyphenator(ru_ru_patterns, isCyrillicLetter, toLowerCyrillic);
using EntryArray = std::array<LanguageEntry, 4>;
const EntryArray& entries() {
static const EntryArray kEntries = {{{"english", "en", &englishHyphenator},
{"french", "fr", &frenchHyphenator},
{"german", "de", &germanHyphenator},
{"russian", "ru", &russianHyphenator}}};
return kEntries;
}
} // namespace
const LanguageHyphenator* getLanguageHyphenatorForPrimaryTag(const std::string& primaryTag) {
const auto& allEntries = entries();
const auto it = std::find_if(allEntries.begin(), allEntries.end(),
[&primaryTag](const LanguageEntry& entry) { return primaryTag == entry.primaryTag; });
return (it != allEntries.end()) ? it->hyphenator : nullptr;
}
LanguageEntryView getLanguageEntries() {
const auto& allEntries = entries();
return LanguageEntryView{allEntries.data(), allEntries.size()};
}