From 43ebe9c66e2cb0d047663d88294b0e14d7c08fc1 Mon Sep 17 00:00:00 2001 From: Arthur Tazhitdinov Date: Tue, 13 Jan 2026 23:51:17 +0500 Subject: [PATCH] fix: Enhance Latin letter case conversion and improve isLatinLetter function --- .../Epub/hyphenation/HyphenationCommon.cpp | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp index 9619bc4a..ffa8b555 100644 --- a/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp +++ b/lib/Epub/Epub/hyphenation/HyphenationCommon.cpp @@ -4,12 +4,25 @@ namespace { -// Convert Latin uppercase letters (A-Z) to lowercase (a-z) +// Convert Latin uppercase letters (ASCII plus Latin-1 supplement) to lowercase uint32_t toLowerLatinImpl(const uint32_t cp) { if (cp >= 'A' && cp <= 'Z') { return cp - 'A' + 'a'; } - return cp; + if ((cp >= 0x00C0 && cp <= 0x00D6) || (cp >= 0x00D8 && cp <= 0x00DE)) { + return cp + 0x20; + } + + switch (cp) { + case 0x0152: // Œ + return 0x0153; // œ + case 0x0178: // Ÿ + return 0x00FF; // ÿ + case 0x1E9E: // ẞ + return 0x00DF; // ß + default: + return cp; + } } // Convert Cyrillic uppercase letters to lowercase @@ -31,7 +44,26 @@ uint32_t toLowerLatin(const uint32_t cp) { return toLowerLatinImpl(cp); } uint32_t toLowerCyrillic(const uint32_t cp) { return toLowerCyrillicImpl(cp); } -bool isLatinLetter(const uint32_t cp) { return (cp >= 'A' && cp <= 'Z') || (cp >= 'a' && cp <= 'z'); } +bool isLatinLetter(const uint32_t cp) { + if ((cp >= 'A' && cp <= 'Z') || (cp >= 'a' && cp <= 'z')) { + return true; + } + + if (((cp >= 0x00C0 && cp <= 0x00D6) || (cp >= 0x00D8 && cp <= 0x00F6) || (cp >= 0x00F8 && cp <= 0x00FF)) && + cp != 0x00D7 && cp != 0x00F7) { + return true; + } + + switch (cp) { + case 0x0152: // Œ + case 0x0153: // œ + case 0x0178: // Ÿ + case 0x1E9E: // ẞ + return true; + default: + return false; + } +} bool isCyrillicLetter(const uint32_t cp) { return (cp >= 0x0400 && cp <= 0x052F); }