Xteink-X4-crosspoint-reader/lib/Utf8/Utf8.cpp
Arthur Tazhitdinov 4bb8a869e7 fix: truncating chapter titles using UTF-8 safe function (#599)
## Summary

* Truncating chapter titles using utf8 safe functions (Cyrillic titles
were split mid codepoint)
* refactoring of lib/Utf8

---

### AI Usage

While CrossPoint doesn't have restrictions on AI tools in contributing,
please be transparent about their usage as it
helps set the right context for reviewers.

Did you use AI tools to help write this code? _**< PARTIALLY >**_
2026-02-01 08:32:47 -05:00

49 lines
1.2 KiB
C++

#include "Utf8.h"
int utf8CodepointLen(const unsigned char c) {
if (c < 0x80) return 1; // 0xxxxxxx
if ((c >> 5) == 0x6) return 2; // 110xxxxx
if ((c >> 4) == 0xE) return 3; // 1110xxxx
if ((c >> 3) == 0x1E) return 4; // 11110xxx
return 1; // fallback for invalid
}
uint32_t utf8NextCodepoint(const unsigned char** string) {
if (**string == 0) {
return 0;
}
const int bytes = utf8CodepointLen(**string);
const uint8_t* chr = *string;
*string += bytes;
if (bytes == 1) {
return chr[0];
}
uint32_t cp = chr[0] & ((1 << (7 - bytes)) - 1); // mask header bits
for (int i = 1; i < bytes; i++) {
cp = (cp << 6) | (chr[i] & 0x3F);
}
return cp;
}
size_t utf8RemoveLastChar(std::string& str) {
if (str.empty()) return 0;
size_t pos = str.size() - 1;
while (pos > 0 && (static_cast<unsigned char>(str[pos]) & 0xC0) == 0x80) {
--pos;
}
str.resize(pos);
return pos;
}
// Truncate string by removing N UTF-8 characters from the end
void utf8TruncateChars(std::string& str, const size_t numChars) {
for (size_t i = 0; i < numChars && !str.empty(); ++i) {
utf8RemoveLastChar(str);
}
}