diff --git a/lib/Epub/Epub/EpubHtmlParserSlim.cpp b/lib/Epub/Epub/EpubHtmlParserSlim.cpp index 783384d..a520b7d 100644 --- a/lib/Epub/Epub/EpubHtmlParserSlim.cpp +++ b/lib/Epub/Epub/EpubHtmlParserSlim.cpp @@ -24,6 +24,8 @@ constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]); const char* SKIP_TAGS[] = {"head", "table"}; constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]); +bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; } + // given the start and end of a tag, check to see if it matches a known tag bool matches_s(const char* tag_name, const char* possible_tags[], const int possible_tag_count) { for (int i = 0; i < possible_tag_count; i++) { @@ -119,17 +121,19 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s } for (int i = 0; i < len; i++) { - // TODO: Extract check - if (s[i] == ' ' || s[i] == '\r' || s[i] == '\n') { + if (isWhitespace(s[i])) { + // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it if (self->partWordBufferIndex > 0) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth, self->italicUntilDepth < self->depth); self->partWordBufferIndex = 0; } + // Skip the whitespace char continue; } + // If we're about to run out of space, then cut the word off and start a new one if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) { self->partWordBuffer[self->partWordBufferIndex] = '\0'; self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth, diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp index 140b997..c6824b5 100644 --- a/lib/Epub/Epub/blocks/TextBlock.cpp +++ b/lib/Epub/Epub/blocks/TextBlock.cpp @@ -3,43 +3,6 @@ #include #include -static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; } - -// move past anything that should be considered part of a work -static int skipWord(const std::string& text, int index, const int length) { - while (index < length && !isWhitespace(text[index])) { - index++; - } - return index; -} - -// skip past any white space characters -static int skipWhitespace(const std::string& html, int index, const int length) { - while (index < length && isWhitespace(html[index])) { - index++; - } - return index; -} - -void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) { - // adding a span to text block - // make a copy of the text as we'll modify it - const int length = span.length(); - // const auto text = new char[length + 1]; - // strcpy(text, span); - // work out where each word is in the span - int index = 0; - while (index < length) { - // skip past any whitespace to the start of a word - index = skipWhitespace(span, index, length); - const int wordStart = index; - // find the end of the word - index = skipWord(span, index, length); - const int wordLength = index - wordStart; - addWord(span.substr(wordStart, wordLength), is_bold, is_italic); - } -} - void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) { if (word.length() == 0) return; diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h index 5184b2d..90ef919 100644 --- a/lib/Epub/Epub/blocks/TextBlock.h +++ b/lib/Epub/Epub/blocks/TextBlock.h @@ -36,7 +36,6 @@ class TextBlock final : public Block { const std::vector& word_styles, const BLOCK_STYLE style) : words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {} ~TextBlock() override = default; - void addSpan(const std::string& span, bool is_bold, bool is_italic); void addWord(const std::string& word, bool is_bold, bool is_italic); void setStyle(const BLOCK_STYLE style) { this->style = style; } BLOCK_STYLE getStyle() const { return style; }