Xteink-X4-crosspoint-reader/lib/Epub/Epub/hyphenation/HyphenationCommon.h

35 lines
801 B
C++

#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
struct CodepointInfo {
uint32_t value;
size_t byteOffset;
};
enum class Script { Latin, Cyrillic, Mixed };
constexpr size_t MIN_PREFIX_CP = 2;
constexpr size_t MIN_SUFFIX_CP = 2;
uint32_t toLowerLatin(uint32_t cp);
uint32_t toLowerCyrillic(uint32_t cp);
bool isLatinLetter(uint32_t cp);
bool isLatinVowel(uint32_t cp);
bool isLatinConsonant(uint32_t cp);
bool isCyrillicLetter(uint32_t cp);
bool isCyrillicVowel(uint32_t cp);
bool isCyrillicConsonant(uint32_t cp);
bool isAlphabetic(uint32_t cp);
bool isVowel(uint32_t cp);
bool isPunctuation(uint32_t cp);
bool isExplicitHyphen(uint32_t cp);
void trimSurroundingPunctuation(std::vector<CodepointInfo>& cps);
Script detectScript(const std::vector<CodepointInfo>& cps);