diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
index 5cd53293..a2ff485c 100644
--- a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
+++ b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@@ -137,6 +137,21 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
continue;
}
+ // Skip soft-hyphen with UTF-8 representation (U+00AD) = 0xC2 0xAD
+ const XML_Char SHY_BYTE_1 = static_cast(0xC2);
+ const XML_Char SHY_BYTE_2 = static_cast(0xAD);
+ // 1. Check for the start of the 2-byte Soft Hyphen sequence
+ if (s[i] == SHY_BYTE_1) {
+ // 2. Check if the next byte exists AND if it completes the sequence
+ // We must check i + 1 < len to prevent reading past the end of the buffer.
+ if ((i + 1 < len) && (s[i + 1] == SHY_BYTE_2)) {
+ // Sequence 0xC2 0xAD found!
+ // Skip the current byte (0xC2) and the next byte (0xAD)
+ i++; // Increment 'i' one more time to skip the 0xAD byte
+ continue; // Skip the rest of the loop and move to the next iteration
+ }
+ }
+
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= MAX_WORD_SIZE) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';