diff --git a/lib/Epub/Epub/EpubHtmlParserSlim.cpp b/lib/Epub/Epub/EpubHtmlParserSlim.cpp
index 783384d..a520b7d 100644
--- a/lib/Epub/Epub/EpubHtmlParserSlim.cpp
+++ b/lib/Epub/Epub/EpubHtmlParserSlim.cpp
@@ -24,6 +24,8 @@ constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
const char* SKIP_TAGS[] = {"head", "table"};
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
+bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
+
// given the start and end of a tag, check to see if it matches a known tag
bool matches_s(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
for (int i = 0; i < possible_tag_count; i++) {
@@ -119,17 +121,19 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
}
for (int i = 0; i < len; i++) {
- // TODO: Extract check
- if (s[i] == ' ' || s[i] == '\r' || s[i] == '\n') {
+ if (isWhitespace(s[i])) {
+ // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
self->italicUntilDepth < self->depth);
self->partWordBufferIndex = 0;
}
+ // Skip the whitespace char
continue;
}
+ // If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
diff --git a/lib/Epub/Epub/blocks/TextBlock.cpp b/lib/Epub/Epub/blocks/TextBlock.cpp
index 140b997..c6824b5 100644
--- a/lib/Epub/Epub/blocks/TextBlock.cpp
+++ b/lib/Epub/Epub/blocks/TextBlock.cpp
@@ -3,43 +3,6 @@
#include
#include
-static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
-
-// move past anything that should be considered part of a work
-static int skipWord(const std::string& text, int index, const int length) {
- while (index < length && !isWhitespace(text[index])) {
- index++;
- }
- return index;
-}
-
-// skip past any white space characters
-static int skipWhitespace(const std::string& html, int index, const int length) {
- while (index < length && isWhitespace(html[index])) {
- index++;
- }
- return index;
-}
-
-void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) {
- // adding a span to text block
- // make a copy of the text as we'll modify it
- const int length = span.length();
- // const auto text = new char[length + 1];
- // strcpy(text, span);
- // work out where each word is in the span
- int index = 0;
- while (index < length) {
- // skip past any whitespace to the start of a word
- index = skipWhitespace(span, index, length);
- const int wordStart = index;
- // find the end of the word
- index = skipWord(span, index, length);
- const int wordLength = index - wordStart;
- addWord(span.substr(wordStart, wordLength), is_bold, is_italic);
- }
-}
-
void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) {
if (word.length() == 0) return;
diff --git a/lib/Epub/Epub/blocks/TextBlock.h b/lib/Epub/Epub/blocks/TextBlock.h
index 5184b2d..90ef919 100644
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@@ -36,7 +36,6 @@ class TextBlock final : public Block {
const std::vector& word_styles, const BLOCK_STYLE style)
: words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {}
~TextBlock() override = default;
- void addSpan(const std::string& span, bool is_bold, bool is_italic);
void addWord(const std::string& word, bool is_bold, bool is_italic);
void setStyle(const BLOCK_STYLE style) { this->style = style; }
BLOCK_STYLE getStyle() const { return style; }