mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-06 23:57:39 +03:00
Skip BOM character (sometimes used in front of em-dashes)
This commit is contained in:
parent
52995fa722
commit
68130d3c44
@ -151,6 +151,20 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip Zero Width No-Break Space / BOM (U+FEFF) = 0xEF 0xBB 0xBF
|
||||||
|
const XML_Char FEFF_BYTE_1 = static_cast<XML_Char>(0xEF);
|
||||||
|
const XML_Char FEFF_BYTE_2 = static_cast<XML_Char>(0xBB);
|
||||||
|
const XML_Char FEFF_BYTE_3 = static_cast<XML_Char>(0xBF);
|
||||||
|
|
||||||
|
if (s[i] == FEFF_BYTE_1) {
|
||||||
|
// Check if the next two bytes complete the 3-byte sequence
|
||||||
|
if ((i + 2 < len) && (s[i + 1] == FEFF_BYTE_2) && (s[i + 2] == FEFF_BYTE_3)) {
|
||||||
|
// Sequence 0xEF 0xBB 0xBF found!
|
||||||
|
i += 2; // Skip the next two bytes
|
||||||
|
continue; // Move to the next iteration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If we're about to run out of space, then cut the word off and start a new one
|
// If we're about to run out of space, then cut the word off and start a new one
|
||||||
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
|
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
|
||||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user