Cleanup TextBlock functions

2026-02-06 15:47:39 +03:00 · 2025-12-06 20:33:45 +11:00 · 2025-12-06 20:33:45 +11:00 · d5a220f0ad
commit d5a220f0ad
parent 74f7ea9650
3 changed files with 6 additions and 40 deletions
--- a/lib/Epub/Epub/EpubHtmlParserSlim.cpp
+++ b/lib/Epub/Epub/EpubHtmlParserSlim.cpp
@ -24,6 +24,8 @@ constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
 const char* SKIP_TAGS[] = {"head", "table"};
 constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
 bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
 // given the start and end of a tag, check to see if it matches a known tag
 bool matches_s(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
  for (int i = 0; i < possible_tag_count; i++) {
@ -119,17 +121,19 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
  }
  for (int i = 0; i < len; i++) {
-    // TODO: Extract check
+    if (isWhitespace(s[i])) {
-    if (s[i] == ' ' || s[i] == '\r' || s[i] == '\n') {
+      // Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
      if (self->partWordBufferIndex > 0) {
        self->partWordBuffer[self->partWordBufferIndex] = '\0';
        self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
                                        self->italicUntilDepth < self->depth);
        self->partWordBufferIndex = 0;
      }
      // Skip the whitespace char
      continue;
    }
    // If we're about to run out of space, then cut the word off and start a new one
    if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) {
      self->partWordBuffer[self->partWordBufferIndex] = '\0';
      self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
--- a/lib/Epub/Epub/blocks/TextBlock.cpp
+++ b/lib/Epub/Epub/blocks/TextBlock.cpp
@ -3,43 +3,6 @@
 #include <EpdRenderer.h>
 #include <Serialization.h>
 static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
 // move past anything that should be considered part of a work
 static int skipWord(const std::string& text, int index, const int length) {
  while (index < length && !isWhitespace(text[index])) {
    index++;
  }
  return index;
 }
 // skip past any white space characters
 static int skipWhitespace(const std::string& html, int index, const int length) {
  while (index < length && isWhitespace(html[index])) {
    index++;
  }
  return index;
 }
 void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) {
  // adding a span to text block
  // make a copy of the text as we'll modify it
  const int length = span.length();
  // const auto text = new char[length + 1];
  // strcpy(text, span);
  // work out where each word is in the span
  int index = 0;
  while (index < length) {
    // skip past any whitespace to the start of a word
    index = skipWhitespace(span, index, length);
    const int wordStart = index;
    // find the end of the word
    index = skipWord(span, index, length);
    const int wordLength = index - wordStart;
    addWord(span.substr(wordStart, wordLength), is_bold, is_italic);
  }
 }
 void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) {
  if (word.length() == 0) return;
--- a/lib/Epub/Epub/blocks/TextBlock.h
+++ b/lib/Epub/Epub/blocks/TextBlock.h
@ -36,7 +36,6 @@ class TextBlock final : public Block {
                     const std::vector<uint8_t>& word_styles, const BLOCK_STYLE style)
      : words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {}
  ~TextBlock() override = default;
  void addSpan(const std::string& span, bool is_bold, bool is_italic);
  void addWord(const std::string& word, bool is_bold, bool is_italic);
  void setStyle(const BLOCK_STYLE style) { this->style = style; }
  BLOCK_STYLE getStyle() const { return style; }