mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-04 14:47:37 +03:00
## Summary * **What is the goal of this PR?** Add Spanish language hyphenation support to improve text rendering for Spanish books. * **What changes are included?** - Added Spanish hyphenation trie (`hyph-es.trie.h`) generated from Typst's hypher patterns - Registered `spanishHyphenator` in `LanguageRegistry.cpp` for language tag `es` - Added Spanish to the hyphenation evaluation test suite - Added Spanish test data file with 5000 test cases ## Additional Context * **Test Results:** Spanish hyphenation achieves 99.02% F1 Score (97.72% perfect matches out of 5000 test cases) * **Compatibility:** Works automatically for EPUBs with `<dc:language>es</dc:language>` (or es-ES, es-MX, etc.) <img width="115" height="189" alt="imagen" src="https://github.com/user-attachments/assets/9b92e7fc-b98d-48af-8d53-dfdc2e68abee" /> | Metric | Value | |--------|-------| | Perfect matches | 97.72% | | Overall Precision | 99.33% | | Overall Recall | 99.42% | | Overall F1 Score | 99.38% | --- ### AI Usage Did you use AI tools to help write this code? _**PARTIALLY**_ AI assisted with: - Guiding and compile - Preparing the PR description
46 lines
1.8 KiB
C++
46 lines
1.8 KiB
C++
#include "LanguageRegistry.h"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
|
|
#include "HyphenationCommon.h"
|
|
#include "generated/hyph-de.trie.h"
|
|
#include "generated/hyph-en.trie.h"
|
|
#include "generated/hyph-es.trie.h"
|
|
#include "generated/hyph-fr.trie.h"
|
|
#include "generated/hyph-ru.trie.h"
|
|
|
|
namespace {
|
|
|
|
// English hyphenation patterns (3/3 minimum prefix/suffix length)
|
|
LanguageHyphenator englishHyphenator(en_us_patterns, isLatinLetter, toLowerLatin, 3, 3);
|
|
LanguageHyphenator frenchHyphenator(fr_patterns, isLatinLetter, toLowerLatin);
|
|
LanguageHyphenator germanHyphenator(de_patterns, isLatinLetter, toLowerLatin);
|
|
LanguageHyphenator russianHyphenator(ru_ru_patterns, isCyrillicLetter, toLowerCyrillic);
|
|
LanguageHyphenator spanishHyphenator(es_patterns, isLatinLetter, toLowerLatin);
|
|
|
|
using EntryArray = std::array<LanguageEntry, 5>;
|
|
|
|
const EntryArray& entries() {
|
|
static const EntryArray kEntries = {{{"english", "en", &englishHyphenator},
|
|
{"french", "fr", &frenchHyphenator},
|
|
{"german", "de", &germanHyphenator},
|
|
{"russian", "ru", &russianHyphenator},
|
|
{"spanish", "es", &spanishHyphenator}}};
|
|
return kEntries;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
const LanguageHyphenator* getLanguageHyphenatorForPrimaryTag(const std::string& primaryTag) {
|
|
const auto& allEntries = entries();
|
|
const auto it = std::find_if(allEntries.begin(), allEntries.end(),
|
|
[&primaryTag](const LanguageEntry& entry) { return primaryTag == entry.primaryTag; });
|
|
return (it != allEntries.end()) ? it->hyphenator : nullptr;
|
|
}
|
|
|
|
LanguageEntryView getLanguageEntries() {
|
|
const auto& allEntries = entries();
|
|
return LanguageEntryView{allEntries.data(), allEntries.size()};
|
|
}
|