mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2025-12-19 15:47:40 +03:00
Cleanup TextBlock functions
This commit is contained in:
parent
74f7ea9650
commit
d5a220f0ad
@ -24,6 +24,8 @@ constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
|
|||||||
const char* SKIP_TAGS[] = {"head", "table"};
|
const char* SKIP_TAGS[] = {"head", "table"};
|
||||||
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
|
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
|
||||||
|
|
||||||
|
bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
|
||||||
|
|
||||||
// given the start and end of a tag, check to see if it matches a known tag
|
// given the start and end of a tag, check to see if it matches a known tag
|
||||||
bool matches_s(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
|
bool matches_s(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
|
||||||
for (int i = 0; i < possible_tag_count; i++) {
|
for (int i = 0; i < possible_tag_count; i++) {
|
||||||
@ -119,17 +121,19 @@ void XMLCALL EpubHtmlParserSlim::characterData(void* userData, const XML_Char* s
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
// TODO: Extract check
|
if (isWhitespace(s[i])) {
|
||||||
if (s[i] == ' ' || s[i] == '\r' || s[i] == '\n') {
|
// Currently looking at whitespace, if there's anything in the partWordBuffer, flush it
|
||||||
if (self->partWordBufferIndex > 0) {
|
if (self->partWordBufferIndex > 0) {
|
||||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||||
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
|
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
|
||||||
self->italicUntilDepth < self->depth);
|
self->italicUntilDepth < self->depth);
|
||||||
self->partWordBufferIndex = 0;
|
self->partWordBufferIndex = 0;
|
||||||
}
|
}
|
||||||
|
// Skip the whitespace char
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we're about to run out of space, then cut the word off and start a new one
|
||||||
if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) {
|
if (self->partWordBufferIndex >= PART_WORD_BUFFER_SIZE - 2) {
|
||||||
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
self->partWordBuffer[self->partWordBufferIndex] = '\0';
|
||||||
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
|
self->currentTextBlock->addWord(replaceHtmlEntities(self->partWordBuffer), self->boldUntilDepth < self->depth,
|
||||||
|
|||||||
@ -3,43 +3,6 @@
|
|||||||
#include <EpdRenderer.h>
|
#include <EpdRenderer.h>
|
||||||
#include <Serialization.h>
|
#include <Serialization.h>
|
||||||
|
|
||||||
static bool isWhitespace(const char c) { return c == ' ' || c == '\r' || c == '\n'; }
|
|
||||||
|
|
||||||
// move past anything that should be considered part of a work
|
|
||||||
static int skipWord(const std::string& text, int index, const int length) {
|
|
||||||
while (index < length && !isWhitespace(text[index])) {
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip past any white space characters
|
|
||||||
static int skipWhitespace(const std::string& html, int index, const int length) {
|
|
||||||
while (index < length && isWhitespace(html[index])) {
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextBlock::addSpan(const std::string& span, const bool is_bold, const bool is_italic) {
|
|
||||||
// adding a span to text block
|
|
||||||
// make a copy of the text as we'll modify it
|
|
||||||
const int length = span.length();
|
|
||||||
// const auto text = new char[length + 1];
|
|
||||||
// strcpy(text, span);
|
|
||||||
// work out where each word is in the span
|
|
||||||
int index = 0;
|
|
||||||
while (index < length) {
|
|
||||||
// skip past any whitespace to the start of a word
|
|
||||||
index = skipWhitespace(span, index, length);
|
|
||||||
const int wordStart = index;
|
|
||||||
// find the end of the word
|
|
||||||
index = skipWord(span, index, length);
|
|
||||||
const int wordLength = index - wordStart;
|
|
||||||
addWord(span.substr(wordStart, wordLength), is_bold, is_italic);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) {
|
void TextBlock::addWord(const std::string& word, const bool is_bold, const bool is_italic) {
|
||||||
if (word.length() == 0) return;
|
if (word.length() == 0) return;
|
||||||
|
|
||||||
|
|||||||
@ -36,7 +36,6 @@ class TextBlock final : public Block {
|
|||||||
const std::vector<uint8_t>& word_styles, const BLOCK_STYLE style)
|
const std::vector<uint8_t>& word_styles, const BLOCK_STYLE style)
|
||||||
: words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {}
|
: words(words), wordXpos(word_xpos), wordStyles(word_styles), style(style) {}
|
||||||
~TextBlock() override = default;
|
~TextBlock() override = default;
|
||||||
void addSpan(const std::string& span, bool is_bold, bool is_italic);
|
|
||||||
void addWord(const std::string& word, bool is_bold, bool is_italic);
|
void addWord(const std::string& word, bool is_bold, bool is_italic);
|
||||||
void setStyle(const BLOCK_STYLE style) { this->style = style; }
|
void setStyle(const BLOCK_STYLE style) { this->style = style; }
|
||||||
BLOCK_STYLE getStyle() const { return style; }
|
BLOCK_STYLE getStyle() const { return style; }
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user