mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-06 07:37:37 +03:00
switch to trie packed liang hyphenation dictionaries
This commit is contained in:
parent
c83fd37286
commit
0b3e029484
2
.gitignore
vendored
2
.gitignore
vendored
@ -4,3 +4,5 @@
|
||||
.vscode
|
||||
lib/EpdFont/fontsrc
|
||||
*.generated.h
|
||||
build
|
||||
**/__pycache__/
|
||||
@ -78,12 +78,6 @@ bool Epub::parseContentOpf(BookMetadataCache::BookMetadata& bookMetadata) {
|
||||
bookMetadata.coverItemHref = opfParser.coverItemHref;
|
||||
bookMetadata.textReferenceHref = opfParser.textReferenceHref;
|
||||
|
||||
if (!bookMetadata.language.empty()) {
|
||||
Serial.printf("[%lu] [EBP] OPF language: %s\n", millis(), bookMetadata.language.c_str());
|
||||
} else {
|
||||
Serial.printf("[%lu] [EBP] OPF language: <none>\n", millis());
|
||||
}
|
||||
|
||||
if (!opfParser.tocNcxPath.empty()) {
|
||||
tocNcxItem = opfParser.tocNcxPath;
|
||||
}
|
||||
|
||||
@ -188,7 +188,6 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
|
||||
[this, &lut](std::unique_ptr<Page> page) { lut.emplace_back(this->onPageComplete(std::move(page))); },
|
||||
progressFn);
|
||||
Hyphenator::setPreferredLanguage(epub->getLanguage());
|
||||
Serial.printf("[%lu] [SCT] Hyphenation language set to: %s\n", millis(), epub->getLanguage().c_str());
|
||||
success = visitor.parseAndBuildPages();
|
||||
|
||||
SdMan.remove(tmpHtmlPath.c_str());
|
||||
|
||||
@ -1,343 +1,9 @@
|
||||
#include "EnglishHyphenator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "HyphenationLiterals.h"
|
||||
|
||||
namespace {
|
||||
|
||||
char lowerLatinChar(const uint32_t cp) {
|
||||
if (!isLatinLetter(cp)) {
|
||||
return 0;
|
||||
}
|
||||
return static_cast<char>(toLowerLatin(cp));
|
||||
}
|
||||
|
||||
bool isEnglishApproximantChar(const char c) { return c == 'l' || c == 'r' || c == 'w' || c == 'y'; }
|
||||
|
||||
bool isEnglishStopChar(const char c) {
|
||||
switch (c) {
|
||||
case 'p':
|
||||
case 'b':
|
||||
case 't':
|
||||
case 'd':
|
||||
case 'k':
|
||||
case 'g':
|
||||
case 'c':
|
||||
case 'q':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isEnglishFricativeChar(const char c) {
|
||||
switch (c) {
|
||||
case 'f':
|
||||
case 'v':
|
||||
case 's':
|
||||
case 'z':
|
||||
case 'h':
|
||||
case 'x':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
using LatinLiteral = HyphenLiteralT<char>;
|
||||
|
||||
constexpr std::array<LatinLiteral, 20> ENGLISH_PREFIXES = {
|
||||
{{"anti", 4}, {"auto", 4}, {"counter", 7}, {"de", 2}, {"dis", 3}, {"hyper", 5}, {"inter", 5},
|
||||
{"micro", 5}, {"mis", 3}, {"mono", 4}, {"multi", 5}, {"non", 3}, {"over", 4}, {"post", 4},
|
||||
{"pre", 3}, {"pro", 3}, {"re", 2}, {"sub", 3}, {"super", 5}, {"trans", 5}}};
|
||||
|
||||
constexpr std::array<LatinLiteral, 24> ENGLISH_SUFFIXES = {
|
||||
{{"able", 4}, {"ible", 4}, {"ing", 3}, {"ings", 4}, {"ed", 2}, {"er", 2}, {"ers", 3}, {"est", 3},
|
||||
{"ful", 3}, {"hood", 4}, {"less", 4}, {"lessly", 6}, {"ly", 2}, {"ment", 4}, {"ments", 5}, {"ness", 4},
|
||||
{"ous", 3}, {"tion", 4}, {"sion", 4}, {"ward", 4}, {"wards", 5}, {"ship", 4}, {"ships", 5}, {"y", 1}}};
|
||||
|
||||
bool nextToApostrophe(const std::vector<CodepointInfo>& cps, size_t index);
|
||||
|
||||
std::string lowercaseLatinWord(const std::vector<CodepointInfo>& cps) {
|
||||
std::string lower;
|
||||
lower.reserve(cps.size());
|
||||
for (const auto& info : cps) {
|
||||
lower.push_back(lowerLatinChar(info.value));
|
||||
}
|
||||
return lower;
|
||||
}
|
||||
|
||||
bool englishSegmentHasVowel(const std::vector<CodepointInfo>& cps, const size_t start, const size_t end) {
|
||||
if (start >= end || start >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
const size_t clampedEnd = std::min(end, cps.size());
|
||||
for (size_t i = start; i < clampedEnd; ++i) {
|
||||
if (isLatinVowel(cps[i].value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool englishBreakAllowed(const std::vector<CodepointInfo>& cps, const size_t breakIndex) {
|
||||
if (breakIndex == 0 || breakIndex >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t prefixLen = breakIndex;
|
||||
const size_t suffixLen = cps.size() - breakIndex;
|
||||
if (prefixLen < MIN_PREFIX_CP || suffixLen < MIN_SUFFIX_CP) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!englishSegmentHasVowel(cps, 0, breakIndex) || !englishSegmentHasVowel(cps, breakIndex, cps.size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nextToApostrophe(cps, breakIndex)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void appendMorphologyBreaks(const std::vector<CodepointInfo>& cps, const std::string& lowerWord,
|
||||
std::vector<size_t>& indexes) {
|
||||
appendLiteralBreaks(
|
||||
lowerWord, ENGLISH_PREFIXES, ENGLISH_SUFFIXES,
|
||||
[&](const size_t breakIndex) { return englishBreakAllowed(cps, breakIndex); }, indexes);
|
||||
}
|
||||
|
||||
struct CharPair {
|
||||
char first;
|
||||
char second;
|
||||
};
|
||||
|
||||
bool matchesDigraph(const char first, const char second, const std::initializer_list<CharPair>& pairs) {
|
||||
for (const auto& pair : pairs) {
|
||||
if (pair.first == first && pair.second == second) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isEnglishDiphthong(const uint32_t first, const uint32_t second) {
|
||||
if (!isLatinLetter(first) || !isLatinLetter(second)) {
|
||||
return false;
|
||||
}
|
||||
const auto f = static_cast<char>(toLowerLatin(first));
|
||||
const auto s = static_cast<char>(toLowerLatin(second));
|
||||
switch (f) {
|
||||
case 'a':
|
||||
return s == 'i' || s == 'y' || s == 'u';
|
||||
case 'e':
|
||||
return s == 'a' || s == 'e' || s == 'i' || s == 'o' || s == 'u' || s == 'y';
|
||||
case 'i':
|
||||
return s == 'e' || s == 'u' || s == 'a';
|
||||
case 'o':
|
||||
return s == 'a' || s == 'e' || s == 'i' || s == 'o' || s == 'u' || s == 'y';
|
||||
case 'u':
|
||||
return s == 'i' || s == 'a' || s == 'e';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isValidEnglishOnsetBigram(const uint32_t firstCp, const uint32_t secondCp) {
|
||||
const char first = lowerLatinChar(firstCp);
|
||||
const char second = lowerLatinChar(secondCp);
|
||||
if (!first || !second) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (matchesDigraph(first, second,
|
||||
{{'c', 'h'},
|
||||
{'s', 'h'},
|
||||
{'t', 'h'},
|
||||
{'p', 'h'},
|
||||
{'w', 'h'},
|
||||
{'w', 'r'},
|
||||
{'k', 'n'},
|
||||
{'g', 'n'},
|
||||
{'p', 's'},
|
||||
{'p', 't'},
|
||||
{'p', 'n'},
|
||||
{'r', 'h'}})) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isEnglishStopChar(first) && isEnglishApproximantChar(second)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isEnglishFricativeChar(first) && isEnglishApproximantChar(second)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (first == 's' && (second == 'p' || second == 't' || second == 'k' || second == 'm' || second == 'n' ||
|
||||
second == 'f' || second == 'l' || second == 'w' || second == 'c')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (second == 'y' && (first == 'p' || first == 'b' || first == 't' || first == 'd' || first == 'f' || first == 'k' ||
|
||||
first == 'g' || first == 'h' || first == 'm' || first == 'n' || first == 'l' || first == 's')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isValidEnglishOnsetTrigram(const uint32_t firstCp, const uint32_t secondCp, const uint32_t thirdCp) {
|
||||
const char first = lowerLatinChar(firstCp);
|
||||
const char second = lowerLatinChar(secondCp);
|
||||
const char third = lowerLatinChar(thirdCp);
|
||||
if (!first || !second || !third) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (first == 's') {
|
||||
if (second == 'p' && (third == 'l' || third == 'r' || third == 'w')) {
|
||||
return true;
|
||||
}
|
||||
if (second == 't' && (third == 'r' || third == 'w' || third == 'y')) {
|
||||
return true;
|
||||
}
|
||||
if (second == 'k' && (third == 'l' || third == 'r' || third == 'w')) {
|
||||
return true;
|
||||
}
|
||||
if (second == 'c' && (third == 'l' || third == 'r')) {
|
||||
return true;
|
||||
}
|
||||
if (second == 'f' && third == 'r') {
|
||||
return true;
|
||||
}
|
||||
if (second == 'h' && third == 'r') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (first == 't' && second == 'h' && third == 'r') {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verifies that the consonant cluster could begin an English syllable.
|
||||
bool englishClusterIsValidOnset(const std::vector<CodepointInfo>& cps, const size_t start, const size_t end) {
|
||||
if (start >= end) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
const char ch = lowerLatinChar(cps[i].value);
|
||||
if (!ch) {
|
||||
return false;
|
||||
}
|
||||
if (!isLatinConsonant(cps[i].value) && ch != 'y') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t len = end - start;
|
||||
if (len == 1) {
|
||||
return true;
|
||||
}
|
||||
if (len == 2) {
|
||||
return isValidEnglishOnsetBigram(cps[start].value, cps[start + 1].value);
|
||||
}
|
||||
if (len == 3) {
|
||||
return isValidEnglishOnsetTrigram(cps[start].value, cps[start + 1].value, cps[start + 2].value);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Picks the longest legal onset inside the consonant cluster between vowels.
|
||||
size_t englishOnsetLength(const std::vector<CodepointInfo>& cps, const size_t clusterStart, const size_t clusterEnd) {
|
||||
const size_t clusterLen = clusterEnd - clusterStart;
|
||||
if (clusterLen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const size_t maxLen = std::min<size_t>(3, clusterLen);
|
||||
for (size_t len = maxLen; len >= 1; --len) {
|
||||
const size_t suffixStart = clusterEnd - len;
|
||||
if (englishClusterIsValidOnset(cps, suffixStart, clusterEnd)) {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Avoids creating hyphen positions adjacent to apostrophes (e.g., contractions).
|
||||
bool nextToApostrophe(const std::vector<CodepointInfo>& cps, const size_t index) {
|
||||
if (index == 0 || index >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
const auto left = cps[index - 1].value;
|
||||
const auto right = cps[index].value;
|
||||
return left == '\'' || right == '\'';
|
||||
}
|
||||
|
||||
// Returns byte indexes where the word may break according to English syllable rules.
|
||||
std::vector<size_t> englishBreakIndexes(const std::vector<CodepointInfo>& cps) {
|
||||
std::vector<size_t> indexes;
|
||||
const size_t wordSize = cps.size();
|
||||
|
||||
std::vector<size_t> vowelPositions;
|
||||
vowelPositions.reserve(wordSize / 2);
|
||||
for (size_t i = 0; i < wordSize; ++i) {
|
||||
if (isLatinVowel(cps[i].value)) {
|
||||
vowelPositions.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (vowelPositions.size() < 2) {
|
||||
return indexes;
|
||||
}
|
||||
|
||||
for (size_t v = 0; v + 1 < vowelPositions.size(); ++v) {
|
||||
const size_t leftVowel = vowelPositions[v];
|
||||
const size_t rightVowel = vowelPositions[v + 1];
|
||||
|
||||
if (rightVowel - leftVowel == 1) {
|
||||
if (!isEnglishDiphthong(cps[leftVowel].value, cps[rightVowel].value) && englishBreakAllowed(cps, rightVowel)) {
|
||||
indexes.push_back(rightVowel);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t clusterStart = leftVowel + 1;
|
||||
const size_t clusterEnd = rightVowel;
|
||||
const size_t onsetLen = englishOnsetLength(cps, clusterStart, clusterEnd);
|
||||
const size_t breakIndex = clusterEnd - onsetLen;
|
||||
|
||||
if (!englishBreakAllowed(cps, breakIndex)) {
|
||||
continue;
|
||||
}
|
||||
indexes.push_back(breakIndex);
|
||||
}
|
||||
|
||||
const auto lowerWord = lowercaseLatinWord(cps);
|
||||
const size_t preDedupeCount = indexes.size();
|
||||
appendMorphologyBreaks(cps, lowerWord, indexes);
|
||||
|
||||
if (indexes.size() > preDedupeCount) {
|
||||
std::sort(indexes.begin(), indexes.end());
|
||||
indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
|
||||
}
|
||||
return indexes;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "LiangHyphenation.h"
|
||||
#include "generated/hyph-en-us.trie.h"
|
||||
|
||||
const EnglishHyphenator& EnglishHyphenator::instance() {
|
||||
static EnglishHyphenator instance;
|
||||
@ -345,5 +11,8 @@ const EnglishHyphenator& EnglishHyphenator::instance() {
|
||||
}
|
||||
|
||||
std::vector<size_t> EnglishHyphenator::breakIndexes(const std::vector<CodepointInfo>& cps) const {
|
||||
return englishBreakIndexes(cps);
|
||||
// The shared Liang engine needs to know which letters are valid, how to lowercase them, and what
|
||||
// TeX-style prefix/suffix minima to respect (currently set to lefthyphenmin=2 and righthyphenmin=2)
|
||||
const LiangWordConfig config(isLatinLetter, toLowerLatin, minPrefix(), minSuffix());
|
||||
return liangBreakIndexes(cps, en_us_patterns, config);
|
||||
}
|
||||
|
||||
@ -8,6 +8,9 @@ class EnglishHyphenator final : public LanguageHyphenator {
|
||||
static const EnglishHyphenator& instance();
|
||||
|
||||
std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const override;
|
||||
// Keep both minima at two characters to mirror Pyphen defaults.
|
||||
size_t minPrefix() const override { return 2; }
|
||||
size_t minSuffix() const override { return 2; }
|
||||
|
||||
private:
|
||||
EnglishHyphenator() = default;
|
||||
|
||||
14
lib/Epub/Epub/hyphenation/GermanHyphenator.cpp
Normal file
14
lib/Epub/Epub/hyphenation/GermanHyphenator.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include "GermanHyphenator.h"
|
||||
|
||||
#include "LiangHyphenation.h"
|
||||
#include "generated/hyph-de.trie.h"
|
||||
|
||||
const GermanHyphenator& GermanHyphenator::instance() {
|
||||
static GermanHyphenator instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
std::vector<size_t> GermanHyphenator::breakIndexes(const std::vector<CodepointInfo>& cps) const {
|
||||
const LiangWordConfig config(isLatinLetter, toLowerLatin, minPrefix(), minSuffix());
|
||||
return liangBreakIndexes(cps, de_patterns, config);
|
||||
}
|
||||
14
lib/Epub/Epub/hyphenation/GermanHyphenator.h
Normal file
14
lib/Epub/Epub/hyphenation/GermanHyphenator.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHyphenator.h"
|
||||
|
||||
// Implements Liang hyphenation rules for German (Latin script).
|
||||
class GermanHyphenator final : public LanguageHyphenator {
|
||||
public:
|
||||
static const GermanHyphenator& instance();
|
||||
|
||||
std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const override;
|
||||
|
||||
private:
|
||||
GermanHyphenator() = default;
|
||||
};
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
namespace {
|
||||
|
||||
// Convert Latin uppercase letters (A-Z) to lowercase (a-z)
|
||||
uint32_t toLowerLatinImpl(const uint32_t cp) {
|
||||
if (cp >= 'A' && cp <= 'Z') {
|
||||
return cp - 'A' + 'a';
|
||||
@ -9,6 +10,9 @@ uint32_t toLowerLatinImpl(const uint32_t cp) {
|
||||
return cp;
|
||||
}
|
||||
|
||||
// Convert Cyrillic uppercase letters to lowercase
|
||||
// Cyrillic uppercase range 0x0410-0x042F maps to lowercase by adding 0x20
|
||||
// Special case: Cyrillic capital IO (0x0401) maps to lowercase io (0x0451)
|
||||
uint32_t toLowerCyrillicImpl(const uint32_t cp) {
|
||||
if (cp >= 0x0410 && cp <= 0x042F) {
|
||||
return cp + 0x20;
|
||||
@ -27,36 +31,8 @@ uint32_t toLowerCyrillic(const uint32_t cp) { return toLowerCyrillicImpl(cp); }
|
||||
|
||||
bool isLatinLetter(const uint32_t cp) { return (cp >= 'A' && cp <= 'Z') || (cp >= 'a' && cp <= 'z'); }
|
||||
|
||||
bool isLatinVowel(uint32_t cp) {
|
||||
cp = toLowerLatinImpl(cp);
|
||||
return cp == 'a' || cp == 'e' || cp == 'i' || cp == 'o' || cp == 'u' || cp == 'y';
|
||||
}
|
||||
|
||||
bool isLatinConsonant(const uint32_t cp) { return isLatinLetter(cp) && !isLatinVowel(cp); }
|
||||
|
||||
bool isCyrillicLetter(const uint32_t cp) { return (cp >= 0x0400 && cp <= 0x052F); }
|
||||
|
||||
bool isCyrillicVowel(uint32_t cp) {
|
||||
cp = toLowerCyrillicImpl(cp);
|
||||
switch (cp) {
|
||||
case 0x0430: // а
|
||||
case 0x0435: // е
|
||||
case 0x0451: // ё
|
||||
case 0x0438: // и
|
||||
case 0x043E: // о
|
||||
case 0x0443: // у
|
||||
case 0x044B: // ы
|
||||
case 0x044D: // э
|
||||
case 0x044E: // ю
|
||||
case 0x044F: // я
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isCyrillicConsonant(const uint32_t cp) { return isCyrillicLetter(cp) && !isCyrillicVowel(cp); }
|
||||
|
||||
bool isAlphabetic(const uint32_t cp) { return isLatinLetter(cp) || isCyrillicLetter(cp); }
|
||||
|
||||
bool isPunctuation(const uint32_t cp) {
|
||||
|
||||
@ -9,20 +9,11 @@ struct CodepointInfo {
|
||||
size_t byteOffset;
|
||||
};
|
||||
|
||||
// Minimum number of codepoints required in prefix and suffix for hyphenation.
|
||||
constexpr size_t MIN_PREFIX_CP = 2;
|
||||
constexpr size_t MIN_SUFFIX_CP = 2;
|
||||
|
||||
uint32_t toLowerLatin(uint32_t cp);
|
||||
uint32_t toLowerCyrillic(uint32_t cp);
|
||||
|
||||
bool isLatinLetter(uint32_t cp);
|
||||
bool isLatinVowel(uint32_t cp);
|
||||
bool isLatinConsonant(uint32_t cp);
|
||||
|
||||
bool isCyrillicLetter(uint32_t cp);
|
||||
bool isCyrillicVowel(uint32_t cp);
|
||||
bool isCyrillicConsonant(uint32_t cp);
|
||||
|
||||
bool isAlphabetic(uint32_t cp);
|
||||
bool isPunctuation(uint32_t cp);
|
||||
|
||||
@ -1,63 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
template <typename T>
|
||||
struct HyphenLiteral {
|
||||
const T* data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using HyphenLiteralT = HyphenLiteral<T>;
|
||||
|
||||
template <typename WordContainer, typename Literal>
|
||||
bool matchesLiteralAt(const WordContainer& word, const size_t start, const Literal& literal) {
|
||||
if (!literal.data || literal.length == 0) {
|
||||
return false;
|
||||
}
|
||||
if (start + literal.length > word.size()) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < literal.length; ++i) {
|
||||
if (word[start + i] != literal.data[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename WordContainer, typename PrefixContainer, typename SuffixContainer, typename BreakAllowedFn>
|
||||
void appendLiteralBreaks(const WordContainer& lowerWord, const PrefixContainer& prefixes,
|
||||
const SuffixContainer& suffixes, BreakAllowedFn&& breakAllowed, std::vector<size_t>& indexes) {
|
||||
const size_t length = lowerWord.size();
|
||||
|
||||
const auto tryPush = [&](const size_t breakIndex) {
|
||||
if (!breakAllowed(breakIndex)) {
|
||||
return;
|
||||
}
|
||||
indexes.push_back(breakIndex);
|
||||
};
|
||||
|
||||
for (const auto& literal : prefixes) {
|
||||
if (literal.length == 0 || literal.length >= length) {
|
||||
continue;
|
||||
}
|
||||
if (!matchesLiteralAt(lowerWord, 0, literal)) {
|
||||
continue;
|
||||
}
|
||||
tryPush(literal.length);
|
||||
}
|
||||
|
||||
for (const auto& literal : suffixes) {
|
||||
if (literal.length == 0 || literal.length >= length) {
|
||||
continue;
|
||||
}
|
||||
const size_t breakIndex = length - literal.length;
|
||||
if (!matchesLiteralAt(lowerWord, breakIndex, literal)) {
|
||||
continue;
|
||||
}
|
||||
tryPush(breakIndex);
|
||||
}
|
||||
}
|
||||
@ -6,6 +6,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "EnglishHyphenator.h"
|
||||
#include "GermanHyphenator.h"
|
||||
#include "HyphenationCommon.h"
|
||||
#include "LanguageHyphenator.h"
|
||||
#include "RussianHyphenator.h"
|
||||
@ -27,6 +28,7 @@ const LanguageHyphenator* hyphenatorForLanguage(const std::string& langTag) {
|
||||
if (primary.empty()) return nullptr;
|
||||
|
||||
if (primary == "en") return &EnglishHyphenator::instance();
|
||||
if (primary == "de") return &GermanHyphenator::instance();
|
||||
if (primary == "ru") return &RussianHyphenator::instance();
|
||||
return nullptr;
|
||||
}
|
||||
@ -78,8 +80,8 @@ void trimTrailingFootnoteReference(std::vector<CodepointInfo>& cps) {
|
||||
}
|
||||
|
||||
// Asks the language hyphenator for legal break positions inside the word.
|
||||
std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps) {
|
||||
if (const auto* hyphenator = cachedHyphenator()) {
|
||||
std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps, const LanguageHyphenator* hyphenator) {
|
||||
if (hyphenator) {
|
||||
return hyphenator->breakIndexes(cps);
|
||||
}
|
||||
return {};
|
||||
@ -140,7 +142,10 @@ std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& w
|
||||
auto cps = collectCodepoints(word);
|
||||
trimSurroundingPunctuation(cps);
|
||||
trimTrailingFootnoteReference(cps);
|
||||
if (cps.size() < MIN_PREFIX_CP + MIN_SUFFIX_CP) {
|
||||
const auto* hyphenator = cachedHyphenator();
|
||||
const size_t minPrefix = hyphenator ? hyphenator->minPrefix() : LanguageHyphenator::kDefaultMinPrefix;
|
||||
const size_t minSuffix = hyphenator ? hyphenator->minSuffix() : LanguageHyphenator::kDefaultMinSuffix;
|
||||
if (cps.size() < minPrefix + minSuffix) {
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -151,11 +156,11 @@ std::vector<Hyphenator::BreakInfo> Hyphenator::breakOffsets(const std::string& w
|
||||
}
|
||||
|
||||
// Ask language hyphenator for legal break points.
|
||||
std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps) : std::vector<size_t>();
|
||||
std::vector<size_t> indexes = hasOnlyAlphabetic(cps) ? collectBreakIndexes(cps, hyphenator) : std::vector<size_t>();
|
||||
|
||||
// Only add fallback breaks if needed and deduplicate if both language and fallback breaks exist.
|
||||
if (includeFallback) {
|
||||
for (size_t idx = MIN_PREFIX_CP; idx + MIN_SUFFIX_CP <= cps.size(); ++idx) {
|
||||
for (size_t idx = minPrefix; idx + minSuffix <= cps.size(); ++idx) {
|
||||
indexes.push_back(idx);
|
||||
}
|
||||
// Only deduplicate if we have both language-specific and fallback breaks.
|
||||
|
||||
@ -1,11 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include "HyphenationCommon.h"
|
||||
|
||||
class LanguageHyphenator {
|
||||
public:
|
||||
static constexpr size_t kDefaultMinPrefix = 2;
|
||||
static constexpr size_t kDefaultMinSuffix = 2;
|
||||
|
||||
virtual ~LanguageHyphenator() = default;
|
||||
virtual std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const = 0;
|
||||
virtual size_t minPrefix() const { return kDefaultMinPrefix; }
|
||||
virtual size_t minSuffix() const { return kDefaultMinSuffix; }
|
||||
};
|
||||
|
||||
360
lib/Epub/Epub/hyphenation/LiangHyphenation.cpp
Normal file
360
lib/Epub/Epub/hyphenation/LiangHyphenation.cpp
Normal file
@ -0,0 +1,360 @@
|
||||
#include "LiangHyphenation.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
// Holds the dotted, lower-case representation used by Liang along with the original character order
|
||||
// so we can traverse via Unicode scalars instead of raw UTF-8 bytes.
|
||||
struct AugmentedWord {
|
||||
std::vector<uint32_t> chars;
|
||||
|
||||
bool empty() const { return chars.empty(); }
|
||||
size_t charCount() const { return chars.size(); }
|
||||
};
|
||||
|
||||
// Adds a single character to the augmented word.
|
||||
void appendCharToAugmentedWord(uint32_t cp, AugmentedWord& word) { word.chars.push_back(cp); }
|
||||
|
||||
// Produces the dotted ('.' + lowercase word + '.') UTF-8 byte stream required by Liang. Classic TeX
|
||||
// hyphenation logic prepends/appends '.' sentinels so that patterns like ".ab" may anchor to word
|
||||
// boundaries. If any character in the candidate word fails the `isLetter` predicate we abort early
|
||||
// and return an empty structure, signaling the caller to skip hyphenation entirely.
|
||||
AugmentedWord buildAugmentedWord(const std::vector<CodepointInfo>& cps, const LiangWordConfig& config) {
|
||||
AugmentedWord word;
|
||||
if (cps.empty()) {
|
||||
return word;
|
||||
}
|
||||
|
||||
word.chars.reserve(cps.size() + 2);
|
||||
|
||||
appendCharToAugmentedWord('.', word);
|
||||
for (const auto& info : cps) {
|
||||
if (!config.isLetter(info.value)) {
|
||||
word.chars.clear();
|
||||
return word;
|
||||
}
|
||||
appendCharToAugmentedWord(config.toLower(info.value), word);
|
||||
}
|
||||
appendCharToAugmentedWord('.', word);
|
||||
return word;
|
||||
}
|
||||
|
||||
// Compact header that prefixes every serialized trie blob and lets us locate
|
||||
// the individual sections without storing pointers in flash.
|
||||
struct SerializedTrieHeader {
|
||||
uint32_t letterCount;
|
||||
uint32_t nodeCount;
|
||||
uint32_t edgeCount;
|
||||
uint32_t valueBytes;
|
||||
};
|
||||
|
||||
constexpr size_t kNodeRecordSize = 7;
|
||||
constexpr uint32_t kNoValueOffset = 0x00FFFFFFu;
|
||||
|
||||
// Lightweight view over the packed blob emitted by the generator script.
|
||||
struct SerializedTrieView {
|
||||
const uint32_t* letters = nullptr;
|
||||
const uint8_t* nodes = nullptr;
|
||||
const uint8_t* edgeChildren = nullptr;
|
||||
const uint8_t* edgeLetters = nullptr;
|
||||
const uint8_t* values = nullptr;
|
||||
uint32_t letterCount = 0;
|
||||
uint32_t nodeCount = 0;
|
||||
uint32_t edgeCount = 0;
|
||||
uint32_t valueBytes = 0;
|
||||
size_t edgeLetterBytes = 0;
|
||||
|
||||
static constexpr size_t kInvalidNodeIndex = std::numeric_limits<size_t>::max();
|
||||
static constexpr uint32_t kInvalidLetterIndex = std::numeric_limits<uint32_t>::max();
|
||||
};
|
||||
|
||||
// Splits the raw byte array into typed slices. We purposely keep this logic
|
||||
// very defensive: any malformed blob results in an empty view so the caller can
|
||||
// bail out quietly.
|
||||
SerializedTrieView parseSerializedTrie(const SerializedHyphenationPatterns& patterns) {
|
||||
SerializedTrieView view;
|
||||
if (!patterns.data || patterns.size < sizeof(SerializedTrieHeader)) {
|
||||
return view;
|
||||
}
|
||||
|
||||
const auto* header = reinterpret_cast<const SerializedTrieHeader*>(patterns.data);
|
||||
const uint8_t* cursor = patterns.data + sizeof(SerializedTrieHeader);
|
||||
const uint8_t* end = patterns.data + patterns.size;
|
||||
|
||||
const auto requireBytes = [&](size_t bytes) {
|
||||
return bytes <= static_cast<size_t>(end - cursor);
|
||||
};
|
||||
|
||||
const size_t lettersBytes = static_cast<size_t>(header->letterCount) * sizeof(uint32_t);
|
||||
if (!requireBytes(lettersBytes)) {
|
||||
return SerializedTrieView{};
|
||||
}
|
||||
view.letters = reinterpret_cast<const uint32_t*>(cursor);
|
||||
cursor += lettersBytes;
|
||||
|
||||
const size_t nodesBytes = static_cast<size_t>(header->nodeCount) * kNodeRecordSize;
|
||||
if (!requireBytes(nodesBytes)) {
|
||||
return SerializedTrieView{};
|
||||
}
|
||||
view.nodes = cursor;
|
||||
cursor += nodesBytes;
|
||||
|
||||
const size_t childBytes = static_cast<size_t>(header->edgeCount) * sizeof(uint16_t);
|
||||
if (!requireBytes(childBytes)) {
|
||||
return SerializedTrieView{};
|
||||
}
|
||||
view.edgeChildren = cursor;
|
||||
cursor += childBytes;
|
||||
|
||||
const size_t letterBits = static_cast<size_t>(header->edgeCount) * 6;
|
||||
const size_t letterBytes = (letterBits + 7) >> 3;
|
||||
if (!requireBytes(letterBytes)) {
|
||||
return SerializedTrieView{};
|
||||
}
|
||||
view.edgeLetters = cursor;
|
||||
view.edgeLetterBytes = letterBytes;
|
||||
cursor += letterBytes;
|
||||
|
||||
if (!requireBytes(header->valueBytes)) {
|
||||
return SerializedTrieView{};
|
||||
}
|
||||
view.values = cursor;
|
||||
view.valueBytes = header->valueBytes;
|
||||
|
||||
view.letterCount = header->letterCount;
|
||||
view.nodeCount = header->nodeCount;
|
||||
view.edgeCount = header->edgeCount;
|
||||
return view;
|
||||
}
|
||||
|
||||
// The serialized blobs live in PROGMEM, so parsing them repeatedly is cheap but
|
||||
// wasteful. Keep a tiny cache indexed by the descriptor address so every
|
||||
// language builds its view only once.
|
||||
const SerializedTrieView& getSerializedTrie(const SerializedHyphenationPatterns& patterns) {
|
||||
struct CacheEntry {
|
||||
const SerializedHyphenationPatterns* key;
|
||||
SerializedTrieView view;
|
||||
};
|
||||
static std::vector<CacheEntry> cache;
|
||||
|
||||
for (const auto& entry : cache) {
|
||||
if (entry.key == &patterns) {
|
||||
return entry.view;
|
||||
}
|
||||
}
|
||||
|
||||
cache.push_back({&patterns, parseSerializedTrie(patterns)});
|
||||
return cache.back().view;
|
||||
}
|
||||
|
||||
uint16_t readUint16LE(const uint8_t* ptr) {
|
||||
return static_cast<uint16_t>(ptr[0]) | static_cast<uint16_t>(static_cast<uint16_t>(ptr[1]) << 8);
|
||||
}
|
||||
|
||||
uint32_t readUint24LE(const uint8_t* ptr) {
|
||||
return static_cast<uint32_t>(ptr[0]) | (static_cast<uint32_t>(ptr[1]) << 8) |
|
||||
(static_cast<uint32_t>(ptr[2]) << 16);
|
||||
}
|
||||
|
||||
// Edges store child indexes and letter indexes in separate, compact arrays. We
|
||||
// read the child from the 16-bit table and decode the 6-bit letter from the
|
||||
// bitstream, which packs two entries per 12 bits on average.
|
||||
uint8_t readEdgeLetterIndex(const SerializedTrieView& trie, const size_t edgeIndex) {
|
||||
if (!trie.edgeLetters) {
|
||||
return 0xFFu;
|
||||
}
|
||||
const size_t bitOffset = edgeIndex * 6;
|
||||
const size_t byteOffset = bitOffset >> 3;
|
||||
if (byteOffset >= trie.edgeLetterBytes) {
|
||||
return 0xFFu;
|
||||
}
|
||||
const uint8_t bitShift = static_cast<uint8_t>(bitOffset & 0x07u);
|
||||
uint32_t chunk = trie.edgeLetters[byteOffset];
|
||||
if (byteOffset + 1 < trie.edgeLetterBytes) {
|
||||
chunk |= static_cast<uint32_t>(trie.edgeLetters[byteOffset + 1]) << 8;
|
||||
}
|
||||
const uint8_t value = static_cast<uint8_t>((chunk >> bitShift) & 0x3Fu);
|
||||
return value;
|
||||
}
|
||||
|
||||
// Materialized view of a node record so call sites do not repeatedly poke into
|
||||
// the byte array.
|
||||
struct NodeFields {
|
||||
uint16_t firstEdge;
|
||||
uint8_t childCount;
|
||||
uint32_t valueOffset;
|
||||
uint8_t valueLength;
|
||||
};
|
||||
|
||||
NodeFields loadNode(const SerializedTrieView& trie, const size_t nodeIndex) {
|
||||
NodeFields fields{0, 0, kNoValueOffset, 0};
|
||||
if (!trie.nodes || nodeIndex >= trie.nodeCount) {
|
||||
return fields;
|
||||
}
|
||||
|
||||
const uint8_t* entry = trie.nodes + nodeIndex * kNodeRecordSize;
|
||||
fields.firstEdge = readUint16LE(entry);
|
||||
fields.childCount = entry[2];
|
||||
fields.valueOffset = readUint24LE(entry + 3);
|
||||
fields.valueLength = entry[6];
|
||||
return fields;
|
||||
}
|
||||
|
||||
// Letter indexes are stored sorted, so a lower_bound gives us O(log n) lookups
|
||||
// without building auxiliary maps.
|
||||
uint32_t letterIndexForCodepoint(const SerializedTrieView& trie, const uint32_t cp) {
|
||||
if (!trie.letters || trie.letterCount == 0) {
|
||||
return SerializedTrieView::kInvalidLetterIndex;
|
||||
}
|
||||
const uint32_t* begin = trie.letters;
|
||||
const uint32_t* end = begin + trie.letterCount;
|
||||
const auto it = std::lower_bound(begin, end, cp);
|
||||
if (it == end || *it != cp) {
|
||||
return SerializedTrieView::kInvalidLetterIndex;
|
||||
}
|
||||
return static_cast<uint32_t>(it - begin);
|
||||
}
|
||||
|
||||
// Walks the child edge slice described by the node record using binary search
|
||||
// on the inlined letter indexes. Returns kInvalidNodeIndex when the path ends.
|
||||
size_t findChild(const SerializedTrieView& trie, const size_t nodeIndex, const uint32_t letter) {
|
||||
const uint32_t letterIndex = letterIndexForCodepoint(trie, letter);
|
||||
if (letterIndex == SerializedTrieView::kInvalidLetterIndex) {
|
||||
return SerializedTrieView::kInvalidNodeIndex;
|
||||
}
|
||||
if (!trie.edgeChildren || !trie.edgeLetters) {
|
||||
return SerializedTrieView::kInvalidNodeIndex;
|
||||
}
|
||||
|
||||
const NodeFields node = loadNode(trie, nodeIndex);
|
||||
size_t low = 0;
|
||||
size_t high = node.childCount;
|
||||
while (low < high) {
|
||||
const size_t mid = low + ((high - low) >> 1);
|
||||
const size_t edgeIndex = static_cast<size_t>(node.firstEdge) + mid;
|
||||
if (edgeIndex >= trie.edgeCount) {
|
||||
return SerializedTrieView::kInvalidNodeIndex;
|
||||
}
|
||||
const uint32_t entryLetterIndex = readEdgeLetterIndex(trie, edgeIndex);
|
||||
if (entryLetterIndex == letterIndex) {
|
||||
const uint8_t* childPtr = trie.edgeChildren + edgeIndex * sizeof(uint16_t);
|
||||
return readUint16LE(childPtr);
|
||||
}
|
||||
if (entryLetterIndex < letterIndex) {
|
||||
low = mid + 1;
|
||||
} else {
|
||||
high = mid;
|
||||
}
|
||||
}
|
||||
return SerializedTrieView::kInvalidNodeIndex;
|
||||
}
|
||||
|
||||
// Merges the pattern's numeric priorities into the global score array (max per slot).
|
||||
void applyPatternValues(const SerializedTrieView& trie, const NodeFields& node,
|
||||
const size_t startCharIndex, std::vector<uint8_t>& scores) {
|
||||
if (node.valueLength == 0 || node.valueOffset == kNoValueOffset || !trie.values ||
|
||||
node.valueOffset >= trie.valueBytes) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t availableBytes = trie.valueBytes - node.valueOffset;
|
||||
const size_t packedBytesNeeded = (static_cast<size_t>(node.valueLength) + 1) >> 1;
|
||||
const size_t packedBytes = std::min<size_t>(packedBytesNeeded, availableBytes);
|
||||
const uint8_t* packedValues = trie.values + node.valueOffset;
|
||||
// Value digits remain nibble-encoded (two per byte) to keep flash usage low;
|
||||
// expand back to single scores just before applying them.
|
||||
for (size_t valueIdx = 0; valueIdx < node.valueLength; ++valueIdx) {
|
||||
const size_t packedIndex = valueIdx >> 1;
|
||||
if (packedIndex >= packedBytes) {
|
||||
break;
|
||||
}
|
||||
const uint8_t packedByte = packedValues[packedIndex];
|
||||
const uint8_t value = (valueIdx & 1u) ? static_cast<uint8_t>((packedByte >> 4) & 0x0Fu)
|
||||
: static_cast<uint8_t>(packedByte & 0x0Fu);
|
||||
const size_t scoreIdx = startCharIndex + valueIdx;
|
||||
if (scoreIdx >= scores.size()) {
|
||||
break;
|
||||
}
|
||||
scores[scoreIdx] = std::max(scores[scoreIdx], value);
|
||||
}
|
||||
}
|
||||
|
||||
// Converts odd score positions back into codepoint indexes, honoring min prefix/suffix constraints.
|
||||
// By iterating over codepoint indexes rather than raw byte offsets we naturally support UTF-8 input
|
||||
// without bookkeeping gymnastics. Each break corresponds to scores[breakIndex + 1] because of the
|
||||
// leading '.' sentinel emitted in buildAugmentedWord().
|
||||
std::vector<size_t> collectBreakIndexes(const std::vector<CodepointInfo>& cps, const std::vector<uint8_t>& scores,
|
||||
const size_t minPrefix, const size_t minSuffix) {
|
||||
std::vector<size_t> indexes;
|
||||
const size_t cpCount = cps.size();
|
||||
if (cpCount < 2) {
|
||||
return indexes;
|
||||
}
|
||||
|
||||
for (size_t breakIndex = 1; breakIndex < cpCount; ++breakIndex) {
|
||||
if (breakIndex < minPrefix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t suffixCount = cpCount - breakIndex;
|
||||
if (suffixCount < minSuffix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t scoreIdx = breakIndex + 1; // Account for leading '.' sentinel.
|
||||
if (scoreIdx >= scores.size()) {
|
||||
break;
|
||||
}
|
||||
if ((scores[scoreIdx] & 1u) == 0) {
|
||||
continue;
|
||||
}
|
||||
indexes.push_back(breakIndex);
|
||||
}
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<size_t> liangBreakIndexes(const std::vector<CodepointInfo>& cps,
|
||||
const SerializedHyphenationPatterns& patterns,
|
||||
const LiangWordConfig& config) {
|
||||
// Step 1: convert the input word into the dotted UTF-8 stream the Liang algorithm expects. A return
|
||||
// value of {} means the word contained something outside the language's alphabet and should be left
|
||||
// untouched by hyphenation.
|
||||
const auto augmented = buildAugmentedWord(cps, config);
|
||||
if (augmented.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Step 2: run the augmented word through the trie-backed pattern table so we reuse common prefixes
|
||||
// instead of rescanning the UTF-8 bytes for every substring.
|
||||
const SerializedTrieView& trie = getSerializedTrie(patterns);
|
||||
if (!trie.nodes || trie.nodeCount == 0) {
|
||||
return {};
|
||||
}
|
||||
const size_t charCount = augmented.charCount();
|
||||
std::vector<uint8_t> scores(charCount + 1, 0);
|
||||
for (size_t charStart = 0; charStart < charCount; ++charStart) {
|
||||
size_t currentNode = 0; // Root node.
|
||||
for (size_t cursor = charStart; cursor < charCount; ++cursor) {
|
||||
const uint32_t letter = augmented.chars[cursor];
|
||||
currentNode = findChild(trie, currentNode, letter);
|
||||
if (currentNode == SerializedTrieView::kInvalidNodeIndex) {
|
||||
break;
|
||||
}
|
||||
|
||||
const NodeFields node = loadNode(trie, currentNode);
|
||||
if (node.valueLength > 0 && node.valueOffset != kNoValueOffset) {
|
||||
applyPatternValues(trie, node, charStart, scores);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: translate odd-numbered score positions back into codepoint indexes, enforcing per-language
|
||||
// prefix/suffix minima so we do not produce visually awkward fragments.
|
||||
return collectBreakIndexes(cps, scores, config.minPrefix, config.minSuffix);
|
||||
}
|
||||
39
lib/Epub/Epub/hyphenation/LiangHyphenation.h
Normal file
39
lib/Epub/Epub/hyphenation/LiangHyphenation.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "HyphenationCommon.h"
|
||||
#include "SerializedHyphenationTrie.h"
|
||||
|
||||
// Encapsulates every language-specific dial the Liang algorithm needs at runtime. The helpers are
|
||||
// intentionally represented as bare function pointers because we invoke them inside tight loops and
|
||||
// want to avoid the overhead of std::function or functors. The minima default to the TeX-recommended
|
||||
// "2/2" split but individual languages (English, for example) can override them.
|
||||
struct LiangWordConfig {
|
||||
static constexpr size_t kDefaultMinPrefix = 2;
|
||||
static constexpr size_t kDefaultMinSuffix = 2;
|
||||
// Predicate used to reject non-alphabetic characters before pattern lookup. Returning false causes
|
||||
// the entire word to be skipped, matching the behavior of classic TeX hyphenation tables.
|
||||
bool (*isLetter)(uint32_t);
|
||||
// Language-specific case folding that matches how the TeX patterns were authored (usually lower-case
|
||||
// ASCII for Latin and lowercase Cyrillic for Russian). Patterns are stored in UTF-8, so this must
|
||||
// operate on Unicode scalars rather than bytes.
|
||||
uint32_t (*toLower)(uint32_t);
|
||||
// Minimum codepoints required on the left/right of any break. These correspond to TeX's
|
||||
// lefthyphenmin and righthyphenmin knobs.
|
||||
size_t minPrefix;
|
||||
size_t minSuffix;
|
||||
|
||||
// Lightweight aggregate constructor so call sites can declare `const LiangWordConfig config(...)`
|
||||
// without verbose member assignment boilerplate.
|
||||
LiangWordConfig(bool (*letterFn)(uint32_t), uint32_t (*lowerFn)(uint32_t),
|
||||
size_t prefix = kDefaultMinPrefix, size_t suffix = kDefaultMinSuffix)
|
||||
: isLetter(letterFn), toLower(lowerFn), minPrefix(prefix), minSuffix(suffix) {}
|
||||
};
|
||||
|
||||
// Shared Liang pattern evaluator used by every language-specific hyphenator.
|
||||
std::vector<size_t> liangBreakIndexes(const std::vector<CodepointInfo>& cps,
|
||||
const SerializedHyphenationPatterns& patterns,
|
||||
const LiangWordConfig& config);
|
||||
@ -1,404 +1,9 @@
|
||||
#include "RussianHyphenator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "HyphenationLiterals.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using CyrillicLiteral = HyphenLiteralT<uint32_t>;
|
||||
|
||||
constexpr uint32_t PFX_BEZ[3] = {0x0431, 0x0435, 0x0437};
|
||||
constexpr uint32_t PFX_RAZ[3] = {0x0440, 0x0430, 0x0437};
|
||||
constexpr uint32_t PFX_POD[3] = {0x043F, 0x043E, 0x0434};
|
||||
constexpr uint32_t PFX_NAD[3] = {0x043D, 0x0430, 0x0434};
|
||||
constexpr uint32_t PFX_PERE[4] = {0x043F, 0x0435, 0x0440, 0x0435};
|
||||
constexpr uint32_t PFX_SVERH[5] = {0x0441, 0x0432, 0x0435, 0x0440, 0x0445};
|
||||
constexpr uint32_t PFX_MEZH[3] = {0x043C, 0x0435, 0x0436};
|
||||
constexpr uint32_t PFX_SUPER[5] = {0x0441, 0x0443, 0x043F, 0x0435, 0x0440};
|
||||
constexpr uint32_t PFX_PRED[4] = {0x043F, 0x0440, 0x0435, 0x0434};
|
||||
constexpr uint32_t PFX_SAMO[4] = {0x0441, 0x0430, 0x043C, 0x043E};
|
||||
constexpr uint32_t PFX_OBO[3] = {0x043E, 0x0431, 0x043E};
|
||||
constexpr uint32_t PFX_PROTIV[6] = {0x043F, 0x0440, 0x043E, 0x0442, 0x0438, 0x0432};
|
||||
|
||||
constexpr std::array<CyrillicLiteral, 12> RUSSIAN_PREFIXES = {{{PFX_BEZ, 3},
|
||||
{PFX_RAZ, 3},
|
||||
{PFX_POD, 3},
|
||||
{PFX_NAD, 3},
|
||||
{PFX_PERE, 4},
|
||||
{PFX_SVERH, 5},
|
||||
{PFX_MEZH, 3},
|
||||
{PFX_SUPER, 5},
|
||||
{PFX_PRED, 4},
|
||||
{PFX_SAMO, 4},
|
||||
{PFX_OBO, 3},
|
||||
{PFX_PROTIV, 6}}};
|
||||
|
||||
constexpr uint32_t SFX_NOST[4] = {0x043D, 0x043E, 0x0441, 0x0442};
|
||||
constexpr uint32_t SFX_STVO[4] = {0x0441, 0x0442, 0x0432, 0x043E};
|
||||
constexpr uint32_t SFX_ENIE[4] = {0x0435, 0x043D, 0x0438, 0x0435};
|
||||
constexpr uint32_t SFX_ATION[4] = {0x0430, 0x0446, 0x0438, 0x044F};
|
||||
constexpr uint32_t SFX_CHIK[3] = {0x0447, 0x0438, 0x043A};
|
||||
constexpr uint32_t SFX_NIK[3] = {0x043D, 0x0438, 0x043A};
|
||||
constexpr uint32_t SFX_TEL[4] = {0x0442, 0x0435, 0x043B, 0x044C};
|
||||
constexpr uint32_t SFX_SKII[4] = {0x0441, 0x043A, 0x0438, 0x0439};
|
||||
constexpr uint32_t SFX_AL[6] = {0x0430, 0x043B, 0x044C, 0x043D, 0x044B, 0x0439};
|
||||
constexpr uint32_t SFX_ISM[3] = {0x0438, 0x0437, 0x043C};
|
||||
constexpr uint32_t SFX_LIV[5] = {0x043B, 0x0438, 0x0432, 0x044B, 0x0439};
|
||||
constexpr uint32_t SFX_OST[4] = {0x043E, 0x0441, 0x0442, 0x044C};
|
||||
|
||||
constexpr std::array<CyrillicLiteral, 12> RUSSIAN_SUFFIXES = {{{SFX_NOST, 4},
|
||||
{SFX_STVO, 4},
|
||||
{SFX_ENIE, 4},
|
||||
{SFX_ATION, 4},
|
||||
{SFX_CHIK, 3},
|
||||
{SFX_NIK, 3},
|
||||
{SFX_TEL, 4},
|
||||
{SFX_SKII, 4},
|
||||
{SFX_AL, 6},
|
||||
{SFX_ISM, 3},
|
||||
{SFX_LIV, 5},
|
||||
{SFX_OST, 4}}};
|
||||
|
||||
std::vector<uint32_t> lowercaseCyrillicWord(const std::vector<CodepointInfo>& cps) {
|
||||
std::vector<uint32_t> lower;
|
||||
lower.reserve(cps.size());
|
||||
for (const auto& info : cps) {
|
||||
lower.push_back(isCyrillicLetter(info.value) ? toLowerCyrillic(info.value) : info.value);
|
||||
}
|
||||
return lower;
|
||||
}
|
||||
|
||||
bool russianSegmentHasVowel(const std::vector<CodepointInfo>& cps, const size_t start, const size_t end) {
|
||||
if (start >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
const size_t clampedEnd = std::min(end, cps.size());
|
||||
for (size_t i = start; i < clampedEnd; ++i) {
|
||||
if (isCyrillicVowel(cps[i].value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool exposesLeadingDoubleConsonant(const std::vector<CodepointInfo>& cps, const size_t index) {
|
||||
if (index + 1 >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
const auto first = cps[index].value;
|
||||
const auto second = cps[index + 1].value;
|
||||
if (!isCyrillicConsonant(first) || !isCyrillicConsonant(second)) {
|
||||
return false;
|
||||
}
|
||||
if (toLowerCyrillic(first) != toLowerCyrillic(second)) {
|
||||
return false;
|
||||
}
|
||||
const bool hasLeftVowel = index > 0 && isCyrillicVowel(cps[index - 1].value);
|
||||
const bool hasRightVowel = (index + 2 < cps.size()) && isCyrillicVowel(cps[index + 2].value);
|
||||
return hasLeftVowel && hasRightVowel;
|
||||
}
|
||||
|
||||
bool exposesTrailingDoubleConsonant(const std::vector<CodepointInfo>& cps, const size_t index) {
|
||||
if (index < 2) {
|
||||
return false;
|
||||
}
|
||||
const auto last = cps[index - 1].value;
|
||||
const auto prev = cps[index - 2].value;
|
||||
if (!isCyrillicConsonant(last) || !isCyrillicConsonant(prev)) {
|
||||
return false;
|
||||
}
|
||||
if (toLowerCyrillic(last) != toLowerCyrillic(prev)) {
|
||||
return false;
|
||||
}
|
||||
const bool hasLeftVowel = (index >= 3) && isCyrillicVowel(cps[index - 3].value);
|
||||
const bool hasRightVowel = (index < cps.size()) && isCyrillicVowel(cps[index].value);
|
||||
return hasLeftVowel && hasRightVowel;
|
||||
}
|
||||
|
||||
bool violatesDoubleConsonantRule(const std::vector<CodepointInfo>& cps, const size_t index) {
|
||||
return exposesLeadingDoubleConsonant(cps, index) || exposesTrailingDoubleConsonant(cps, index);
|
||||
}
|
||||
|
||||
// Checks if the codepoint is the Cyrillic soft sign (ь).
|
||||
bool isSoftSign(uint32_t cp) { return toLowerCyrillic(cp) == 0x044C; }
|
||||
|
||||
// Checks if the codepoint is the Cyrillic hard sign (ъ).
|
||||
bool isHardSign(uint32_t cp) { return toLowerCyrillic(cp) == 0x044A; }
|
||||
|
||||
// Checks if the codepoint is either the Cyrillic soft sign (ь) or hard sign (ъ).
|
||||
bool isSoftOrHardSign(uint32_t cp) { return isSoftSign(cp) || isHardSign(cp); }
|
||||
|
||||
// Checks if the codepoint is the Cyrillic short i (й).
|
||||
bool isCyrillicShortI(uint32_t cp) { return toLowerCyrillic(cp) == 0x0439; }
|
||||
|
||||
// Checks if the codepoint is the Cyrillic yeru (ы).
|
||||
bool isCyrillicYeru(uint32_t cp) { return toLowerCyrillic(cp) == 0x044B; }
|
||||
|
||||
// Checks if the codepoint is a Russian prefix consonant that can start certain clusters.
|
||||
bool isRussianPrefixConsonant(uint32_t cp) {
|
||||
cp = toLowerCyrillic(cp);
|
||||
return cp == 0x0432 || cp == 0x0437 || cp == 0x0441; // в, з, с
|
||||
}
|
||||
|
||||
// Checks if the codepoint is a Russian sibilant consonant.
|
||||
bool isRussianSibilant(uint32_t cp) {
|
||||
cp = toLowerCyrillic(cp);
|
||||
switch (cp) {
|
||||
case 0x0437: // з
|
||||
case 0x0441: // с
|
||||
case 0x0436: // ж
|
||||
case 0x0448: // ш
|
||||
case 0x0449: // щ
|
||||
case 0x0447: // ч
|
||||
case 0x0446: // ц
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if the codepoint is a Russian stop consonant.
|
||||
bool isRussianStop(uint32_t cp) {
|
||||
cp = toLowerCyrillic(cp);
|
||||
switch (cp) {
|
||||
case 0x0431: // б
|
||||
case 0x0433: // г
|
||||
case 0x0434: // д
|
||||
case 0x043F: // п
|
||||
case 0x0442: // т
|
||||
case 0x043A: // к
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks the sonority rank of a Russian consonant for syllable onset validation.
|
||||
int russianSonority(uint32_t cp) {
|
||||
cp = toLowerCyrillic(cp);
|
||||
switch (cp) {
|
||||
case 0x043B: // л
|
||||
case 0x0440: // р
|
||||
case 0x0439: // й
|
||||
return 4;
|
||||
case 0x043C: // м
|
||||
case 0x043D: // н
|
||||
return 3;
|
||||
case 0x0432: // в
|
||||
case 0x0437: // з
|
||||
case 0x0436: // ж
|
||||
return 2;
|
||||
case 0x0444: // ф
|
||||
case 0x0441: // с
|
||||
case 0x0448: // ш
|
||||
case 0x0449: // щ
|
||||
case 0x0447: // ч
|
||||
case 0x0446: // ц
|
||||
case 0x0445: // х
|
||||
return 1;
|
||||
case 0x0431: // б
|
||||
case 0x0433: // г
|
||||
case 0x0434: // д
|
||||
case 0x043F: // п
|
||||
case 0x0442: // т
|
||||
case 0x043A: // к
|
||||
return 0;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Applies Russian sonority sequencing to ensure the consonant cluster can start a syllable.
|
||||
bool russianClusterIsValidOnset(const std::vector<CodepointInfo>& cps, const size_t start, const size_t end) {
|
||||
if (start >= end) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
const auto cp = cps[i].value;
|
||||
if (!isCyrillicConsonant(cp) || isSoftOrHardSign(cp)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (end - start == 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = start; i + 1 < end; ++i) {
|
||||
const uint32_t current = cps[i].value;
|
||||
const uint32_t next = cps[i + 1].value;
|
||||
const int currentRank = russianSonority(current);
|
||||
const int nextRank = russianSonority(next);
|
||||
if (currentRank > nextRank) {
|
||||
const bool atClusterStart = (i == start);
|
||||
const bool prefixAllowance = atClusterStart && isRussianPrefixConsonant(current);
|
||||
const bool sibilantAllowance = isRussianSibilant(current) && isRussianStop(next);
|
||||
if (!prefixAllowance && !sibilantAllowance) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Identifies splits within double consonant clusters.
|
||||
size_t doubleConsonantSplit(const std::vector<CodepointInfo>& cps, const size_t clusterStart, const size_t clusterEnd) {
|
||||
for (size_t i = clusterStart; i + 1 < clusterEnd; ++i) {
|
||||
const auto left = cps[i].value;
|
||||
const auto right = cps[i + 1].value;
|
||||
if (isCyrillicConsonant(left) && toLowerCyrillic(left) == toLowerCyrillic(right) && !isSoftOrHardSign(right)) {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
return std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
// Prevents breaks that would create forbidden suffixes.
|
||||
bool beginsWithForbiddenSuffix(const std::vector<CodepointInfo>& cps, const size_t index) {
|
||||
if (index >= cps.size()) {
|
||||
return true;
|
||||
}
|
||||
const auto cp = cps[index].value;
|
||||
return isSoftOrHardSign(cp) || isCyrillicShortI(cp) || isCyrillicYeru(cp);
|
||||
}
|
||||
|
||||
// Validates whether a hyphenation break is allowed at the specified index.
|
||||
bool russianBreakAllowed(const std::vector<CodepointInfo>& cps, const size_t breakIndex) {
|
||||
if (breakIndex == 0 || breakIndex >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t prefixLen = breakIndex;
|
||||
const size_t suffixLen = cps.size() - breakIndex;
|
||||
if (prefixLen < 2 || suffixLen < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!russianSegmentHasVowel(cps, 0, breakIndex) || !russianSegmentHasVowel(cps, breakIndex, cps.size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (beginsWithForbiddenSuffix(cps, breakIndex)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (violatesDoubleConsonantRule(cps, breakIndex)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Chooses the longest valid onset contained within the inter-vowel cluster.
|
||||
size_t russianOnsetLength(const std::vector<CodepointInfo>& cps, const size_t clusterStart, const size_t clusterEnd) {
|
||||
const size_t clusterLen = clusterEnd - clusterStart;
|
||||
if (clusterLen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const size_t maxLen = std::min<size_t>(4, clusterLen);
|
||||
for (size_t len = maxLen; len >= 1; --len) {
|
||||
const size_t suffixStart = clusterEnd - len;
|
||||
if (russianClusterIsValidOnset(cps, suffixStart, clusterEnd)) {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Prevents hyphenation splits immediately beside ь/ъ characters.
|
||||
bool nextToSoftSign(const std::vector<CodepointInfo>& cps, const size_t index) {
|
||||
if (index == 0 || index >= cps.size()) {
|
||||
return false;
|
||||
}
|
||||
const auto left = cps[index - 1].value;
|
||||
const auto right = cps[index].value;
|
||||
return isSoftOrHardSign(left) || isSoftOrHardSign(right);
|
||||
}
|
||||
|
||||
void appendMorphologyBreaks(const std::vector<CodepointInfo>& cps, const std::vector<uint32_t>& lowerWord,
|
||||
std::vector<size_t>& indexes) {
|
||||
appendLiteralBreaks(
|
||||
lowerWord, RUSSIAN_PREFIXES, RUSSIAN_SUFFIXES,
|
||||
[&](const size_t breakIndex) { return russianBreakAllowed(cps, breakIndex); }, indexes);
|
||||
}
|
||||
|
||||
// Produces syllable break indexes tailored to Russian phonotactics.
|
||||
std::vector<size_t> russianBreakIndexes(const std::vector<CodepointInfo>& cps) {
|
||||
std::vector<size_t> indexes;
|
||||
const size_t wordSize = cps.size();
|
||||
|
||||
// Collect vowel positions.
|
||||
std::vector<size_t> vowelPositions;
|
||||
vowelPositions.reserve(wordSize / 2); // Typical estimate: ~50% vowels
|
||||
for (size_t i = 0; i < wordSize; ++i) {
|
||||
if (isCyrillicVowel(cps[i].value)) {
|
||||
vowelPositions.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Need at least 2 vowels to create a syllable break.
|
||||
if (vowelPositions.size() < 2) {
|
||||
return indexes;
|
||||
}
|
||||
|
||||
// Process inter-vowel clusters for hyphenation points.
|
||||
for (size_t v = 0; v + 1 < vowelPositions.size(); ++v) {
|
||||
const size_t leftVowel = vowelPositions[v];
|
||||
const size_t rightVowel = vowelPositions[v + 1];
|
||||
const size_t suffixLen = wordSize - rightVowel;
|
||||
|
||||
// Adjacent vowels: can break between them if constraints allow.
|
||||
if (rightVowel - leftVowel == 1) {
|
||||
if (rightVowel >= MIN_PREFIX_CP && suffixLen >= MIN_SUFFIX_CP && !nextToSoftSign(cps, rightVowel) &&
|
||||
russianBreakAllowed(cps, rightVowel)) {
|
||||
indexes.push_back(rightVowel);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Consonant cluster between vowels: find optimal break point.
|
||||
const size_t clusterStart = leftVowel + 1;
|
||||
const size_t clusterEnd = rightVowel;
|
||||
|
||||
// Try double consonant split first (preferred).
|
||||
size_t breakIndex = doubleConsonantSplit(cps, clusterStart, clusterEnd);
|
||||
|
||||
// Fall back to onset-based split.
|
||||
if (breakIndex == std::numeric_limits<size_t>::max()) {
|
||||
const size_t onsetLen = russianOnsetLength(cps, clusterStart, clusterEnd);
|
||||
breakIndex = clusterEnd - onsetLen;
|
||||
}
|
||||
|
||||
// Validate candidate break point.
|
||||
if (breakIndex < MIN_PREFIX_CP || suffixLen < MIN_SUFFIX_CP || nextToSoftSign(cps, breakIndex) ||
|
||||
!russianBreakAllowed(cps, breakIndex)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
indexes.push_back(breakIndex);
|
||||
}
|
||||
|
||||
const auto lowerWord = lowercaseCyrillicWord(cps);
|
||||
const size_t preDedupeCount = indexes.size();
|
||||
appendMorphologyBreaks(cps, lowerWord, indexes);
|
||||
|
||||
if (indexes.size() > preDedupeCount) {
|
||||
std::sort(indexes.begin(), indexes.end());
|
||||
indexes.erase(std::unique(indexes.begin(), indexes.end()), indexes.end());
|
||||
}
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#include "LiangHyphenation.h"
|
||||
#include "generated/hyph-ru-ru.trie.h"
|
||||
|
||||
const RussianHyphenator& RussianHyphenator::instance() {
|
||||
static RussianHyphenator instance;
|
||||
@ -406,5 +11,9 @@ const RussianHyphenator& RussianHyphenator::instance() {
|
||||
}
|
||||
|
||||
std::vector<size_t> RussianHyphenator::breakIndexes(const std::vector<CodepointInfo>& cps) const {
|
||||
return russianBreakIndexes(cps);
|
||||
// Russian uses the same Liang runtime but needs Cyrillic-aware helpers plus symmetrical
|
||||
// lefthyphenmin/righthyphenmin values. Most Russian TeX distributions stick with 2/2, which keeps
|
||||
// short words readable while still allowing frequent hyphenation opportunities.
|
||||
const LiangWordConfig config(isCyrillicLetter, toLowerCyrillic, minPrefix(), minSuffix());
|
||||
return liangBreakIndexes(cps, ru_ru_patterns, config);
|
||||
}
|
||||
|
||||
@ -8,6 +8,8 @@ class RussianHyphenator final : public LanguageHyphenator {
|
||||
static const RussianHyphenator& instance();
|
||||
|
||||
std::vector<size_t> breakIndexes(const std::vector<CodepointInfo>& cps) const override;
|
||||
size_t minPrefix() const override { return 2; }
|
||||
size_t minSuffix() const override { return 2; }
|
||||
|
||||
private:
|
||||
RussianHyphenator() = default;
|
||||
|
||||
10
lib/Epub/Epub/hyphenation/SerializedHyphenationTrie.h
Normal file
10
lib/Epub/Epub/hyphenation/SerializedHyphenationTrie.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// Lightweight descriptor that points at a serialized Liang hyphenation trie stored in flash.
|
||||
struct SerializedHyphenationPatterns {
|
||||
const std::uint8_t* data;
|
||||
size_t size;
|
||||
};
|
||||
10056
lib/Epub/Epub/hyphenation/generated/hyph-de.trie.h
Normal file
10056
lib/Epub/Epub/hyphenation/generated/hyph-de.trie.h
Normal file
File diff suppressed because it is too large
Load Diff
3162
lib/Epub/Epub/hyphenation/generated/hyph-en-us.trie.h
Normal file
3162
lib/Epub/Epub/hyphenation/generated/hyph-en-us.trie.h
Normal file
File diff suppressed because it is too large
Load Diff
2071
lib/Epub/Epub/hyphenation/generated/hyph-ru-ru.trie.h
Normal file
2071
lib/Epub/Epub/hyphenation/generated/hyph-ru-ru.trie.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,9 @@
|
||||
[platformio]
|
||||
crosspoint_version = 0.12.0
|
||||
default_envs = default
|
||||
|
||||
[crosspoint]
|
||||
crosspoint_version = 0.12.0
|
||||
|
||||
[base]
|
||||
platform = espressif32 @ 6.12.0
|
||||
board = esp32-c3-devkitm-1
|
||||
@ -50,10 +52,10 @@ lib_deps =
|
||||
extends = base
|
||||
build_flags =
|
||||
${base.build_flags}
|
||||
-DCROSSPOINT_VERSION=\"${platformio.crosspoint_version}-dev\"
|
||||
-DCROSSPOINT_VERSION=\"${crosspoint.crosspoint_version}-dev\"
|
||||
|
||||
[env:gh_release]
|
||||
extends = base
|
||||
build_flags =
|
||||
${base.build_flags}
|
||||
-DCROSSPOINT_VERSION=\"${platformio.crosspoint_version}\"
|
||||
-DCROSSPOINT_VERSION=\"${crosspoint.crosspoint_version}\"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user