mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-04 06:37:38 +03:00
## Summary * Truncating chapter titles using utf8 safe functions (Cyrillic titles were split mid codepoint) * refactoring of lib/Utf8 --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**< PARTIALLY >**_
49 lines
1.2 KiB
C++
49 lines
1.2 KiB
C++
#include "Utf8.h"
|
|
|
|
int utf8CodepointLen(const unsigned char c) {
|
|
if (c < 0x80) return 1; // 0xxxxxxx
|
|
if ((c >> 5) == 0x6) return 2; // 110xxxxx
|
|
if ((c >> 4) == 0xE) return 3; // 1110xxxx
|
|
if ((c >> 3) == 0x1E) return 4; // 11110xxx
|
|
return 1; // fallback for invalid
|
|
}
|
|
|
|
uint32_t utf8NextCodepoint(const unsigned char** string) {
|
|
if (**string == 0) {
|
|
return 0;
|
|
}
|
|
|
|
const int bytes = utf8CodepointLen(**string);
|
|
const uint8_t* chr = *string;
|
|
*string += bytes;
|
|
|
|
if (bytes == 1) {
|
|
return chr[0];
|
|
}
|
|
|
|
uint32_t cp = chr[0] & ((1 << (7 - bytes)) - 1); // mask header bits
|
|
|
|
for (int i = 1; i < bytes; i++) {
|
|
cp = (cp << 6) | (chr[i] & 0x3F);
|
|
}
|
|
|
|
return cp;
|
|
}
|
|
|
|
size_t utf8RemoveLastChar(std::string& str) {
|
|
if (str.empty()) return 0;
|
|
size_t pos = str.size() - 1;
|
|
while (pos > 0 && (static_cast<unsigned char>(str[pos]) & 0xC0) == 0x80) {
|
|
--pos;
|
|
}
|
|
str.resize(pos);
|
|
return pos;
|
|
}
|
|
|
|
// Truncate string by removing N UTF-8 characters from the end
|
|
void utf8TruncateChars(std::string& str, const size_t numChars) {
|
|
for (size_t i = 0; i < numChars && !str.empty(); ++i) {
|
|
utf8RemoveLastChar(str);
|
|
}
|
|
}
|