mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-05 23:27:38 +03:00
## Summary * Adds (optional) Hyphenation for English, French, German, Russian languages ## Additional Context * Included hyphenation dictionaries add approximately 280kb to the flash usage (German alone takes 200kb) * Trie encoded dictionaries are adopted from hypher project (https://github.com/typst/hypher) * Soft hyphens (and other explicit hyphens) take precedence over dict-based hyphenation. Overall, the hyphenation rules are quite aggressive, as I believe it makes more sense on our smaller screen. --------- Co-authored-by: Dave Allie <dave@daveallie.com>
26 lines
614 B
C++
26 lines
614 B
C++
#pragma once
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
struct CodepointInfo {
|
|
uint32_t value;
|
|
size_t byteOffset;
|
|
};
|
|
|
|
uint32_t toLowerLatin(uint32_t cp);
|
|
uint32_t toLowerCyrillic(uint32_t cp);
|
|
|
|
bool isLatinLetter(uint32_t cp);
|
|
bool isCyrillicLetter(uint32_t cp);
|
|
|
|
bool isAlphabetic(uint32_t cp);
|
|
bool isPunctuation(uint32_t cp);
|
|
bool isAsciiDigit(uint32_t cp);
|
|
bool isExplicitHyphen(uint32_t cp);
|
|
bool isSoftHyphen(uint32_t cp);
|
|
void trimSurroundingPunctuationAndFootnote(std::vector<CodepointInfo>& cps);
|
|
std::vector<CodepointInfo> collectCodepoints(const std::string& word);
|