Cleanup TextBlock functions

This commit is contained in:
Dave Allie 2025-12-06 20:33:45 +11:00
parent 74f7ea9650
commit d5a220f0ad
No known key found for this signature in database
GPG Key ID: F2FDDB3AD8D0276F
3 changed files with 6 additions and 40 deletions

View File

@ -24,6 +24,8 @@ constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
const char* SKIP_TAGS[] = {"head", "table"};
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
// given the start and end of a tag, check to see if it matches a known tag
bool matches_s(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
for (int i = 0; i < possible_tag_count; i++) {
@ -119,17 +121,19 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
}
for (int i = 0; i < len; i++) {
// TODO: Extract check
if (s[i] == ' ' || s[i] == '\r' || s[i] == '\n') {
if (isWhitespace(s[i])) {
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
if (self->partWordBufferIndex > 0) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
self->italicUntilDepth < self->depth);
self->partWordBufferIndex = 0;
}
// Skip the whitespace char
continue;
}
// If we're about to run out of space, then cut the word off and start a new one
if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) {
self->partWordBuffer[self->partWordBufferIndex] = '\0';
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,

View File

@ -3,43 +3,6 @@
#include <EpdRenderer.h>
#include <Serialization.h>
static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
// move past anything that should be considered part of a work
static int skipWord(const std::string& text, int index, const int length) {
while (index < length && !isWhitespace(text[index])) {
index++;
}
return index;
}
// skip past any white space characters
static int skipWhitespace(const std::string& html, int index, const int length) {
while (index < length && isWhitespace(html[index])) {
index++;
}
return index;
}
void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) {
// adding a span to text block
// make a copy of the text as we'll modify it
const int length = span.length();
// const auto text = new char[length + 1];
// strcpy(text, span);
// work out where each word is in the span
int index = 0;
while (index < length) {
// skip past any whitespace to the start of a word
index = skipWhitespace(span, index, length);
const int wordStart = index;
// find the end of the word
index = skipWord(span, index, length);
const int wordLength = index - wordStart;
addWord(span.substr(wordStart, wordLength), is_bold, is_italic);
}
}
void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) {
if (word.length() == 0) return;

View File

@ -36,7 +36,6 @@ class TextBlock final : public Block {
const std::vector<uint8_t>& word_styles, const BLOCK_STYLE style)
: words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {}
~TextBlock() override = default;
void addSpan(const std::string& span, bool is_bold, bool is_italic);
void addWord(const std::string& word, bool is_bold, bool is_italic);
void setStyle(const BLOCK_STYLE style) { this->style = style; }
BLOCK_STYLE getStyle() const { return style; }