Xteink-X4-crosspoint-reader/lib/Utf8/Utf8.cpp
Jesse Vincent e39562e1c8 feat: Add f-ligature support (ff, fi, fl, ffi, ffl)
Add ligature codepoints (U+FB00–FB06) to font conversion so all fonts
include ligature glyphs. Implement utf8NextCodepointWithLigatures() that
does lookahead substitution for f-ligature sequences. Use it in text
rendering and measurement so ligatures render correctly and word-wrapping
accounts for the ligature glyph widths.
2026-02-01 23:07:08 -08:00

85 lines
2.0 KiB
C++

#include "Utf8.h"
int utf8CodepointLen(const unsigned char c) {
if (c < 0x80) return 1; // 0xxxxxxx
if ((c >> 5) == 0x6) return 2; // 110xxxxx
if ((c >> 4) == 0xE) return 3; // 1110xxxx
if ((c >> 3) == 0x1E) return 4; // 11110xxx
return 1; // fallback for invalid
}
uint32_t utf8NextCodepoint(const unsigned char** string) {
if (**string == 0) {
return 0;
}
const int bytes = utf8CodepointLen(**string);
const uint8_t* chr = *string;
*string += bytes;
if (bytes == 1) {
return chr[0];
}
uint32_t cp = chr[0] & ((1 << (7 - bytes)) - 1); // mask header bits
for (int i = 1; i < bytes; i++) {
cp = (cp << 6) | (chr[i] & 0x3F);
}
return cp;
}
uint32_t utf8NextCodepointWithLigatures(const unsigned char** string) {
const uint32_t cp = utf8NextCodepoint(string);
if (cp != 'f') {
return cp;
}
// Save position after 'f' for potential rollback
const unsigned char* afterF = *string;
const uint32_t next = utf8NextCodepoint(string);
if (next == 'f') {
// ff — check for ffi or ffl
const unsigned char* afterFF = *string;
const uint32_t third = utf8NextCodepoint(string);
if (third == 'i') {
return 0xFB03; // ffi
}
if (third == 'l') {
return 0xFB04; // ffl
}
// Just ff, roll back the third character
*string = afterFF;
return 0xFB00; // ff
}
if (next == 'i') {
return 0xFB01; // fi
}
if (next == 'l') {
return 0xFB02; // fl
}
// No ligature match, roll back to after 'f'
*string = afterF;
return 'f';
}
size_t utf8RemoveLastChar(std::string& str) {
if (str.empty()) return 0;
size_t pos = str.size() - 1;
while (pos > 0 && (static_cast<unsigned char>(str[pos]) & 0xC0) == 0x80) {
--pos;
}
str.resize(pos);
return pos;
}
// Truncate string by removing N UTF-8 characters from the end
void utf8TruncateChars(std::string& str, const size_t numChars) {
for (size_t i = 0; i < numChars && !str.empty(); ++i) {
utf8RemoveLastChar(str);
}
}