mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2025-12-18 15:17:42 +03:00
182 lines
5.5 KiB
C++
182 lines
5.5 KiB
C++
#include "EpubHtmlParser.h"
|
|
|
|
#include <EpdRenderer.h>
|
|
#include <HardwareSerial.h>
|
|
|
|
#include "Page.h"
|
|
#include "htmlEntities.h"
|
|
|
|
const char* HEADER_TAGS[] = {"h1", "h2", "h3", "h4", "h5", "h6"};
|
|
constexpr int NUM_HEADER_TAGS = sizeof(HEADER_TAGS) / sizeof(HEADER_TAGS[0]);
|
|
|
|
const char* BLOCK_TAGS[] = {"p", "li", "div", "br"};
|
|
constexpr int NUM_BLOCK_TAGS = sizeof(BLOCK_TAGS) / sizeof(BLOCK_TAGS[0]);
|
|
|
|
const char* BOLD_TAGS[] = {"b"};
|
|
constexpr int NUM_BOLD_TAGS = sizeof(BOLD_TAGS) / sizeof(BOLD_TAGS[0]);
|
|
|
|
const char* ITALIC_TAGS[] = {"i"};
|
|
constexpr int NUM_ITALIC_TAGS = sizeof(ITALIC_TAGS) / sizeof(ITALIC_TAGS[0]);
|
|
|
|
const char* IMAGE_TAGS[] = {"img"};
|
|
constexpr int NUM_IMAGE_TAGS = sizeof(IMAGE_TAGS) / sizeof(IMAGE_TAGS[0]);
|
|
|
|
const char* SKIP_TAGS[] = {"head", "table"};
|
|
constexpr int NUM_SKIP_TAGS = sizeof(SKIP_TAGS) / sizeof(SKIP_TAGS[0]);
|
|
|
|
// given the start and end of a tag, check to see if it matches a known tag
|
|
bool matches(const char* tag_name, const char* possible_tags[], const int possible_tag_count) {
|
|
for (int i = 0; i < possible_tag_count; i++) {
|
|
if (strcmp(tag_name, possible_tags[i]) == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// start a new text block if needed
|
|
void EpubHtmlParser::startNewTextBlock(const BLOCK_STYLE style) {
|
|
if (currentTextBlock) {
|
|
// already have a text block running and it is empty - just reuse it
|
|
if (currentTextBlock->isEmpty()) {
|
|
currentTextBlock->set_style(style);
|
|
return;
|
|
}
|
|
|
|
currentTextBlock->finish();
|
|
makePages();
|
|
delete currentTextBlock;
|
|
}
|
|
currentTextBlock = new TextBlock(style);
|
|
}
|
|
|
|
bool EpubHtmlParser::VisitEnter(const tinyxml2::XMLElement& element, const tinyxml2::XMLAttribute* firstAttribute) {
|
|
const char* tag_name = element.Name();
|
|
if (matches(tag_name, IMAGE_TAGS, NUM_IMAGE_TAGS)) {
|
|
const char* src = element.Attribute("src");
|
|
if (src) {
|
|
// don't leave an empty text block in the list
|
|
// const BLOCK_STYLE style = currentTextBlock->get_style();
|
|
if (currentTextBlock->isEmpty()) {
|
|
delete currentTextBlock;
|
|
currentTextBlock = nullptr;
|
|
}
|
|
// TODO: Fix this
|
|
// blocks.push_back(new ImageBlock(m_base_path + src));
|
|
// start a new text block - with the same style as before
|
|
// startNewTextBlock(style);
|
|
} else {
|
|
// ESP_LOGE(TAG, "Could not find src attribute");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
if (matches(tag_name, SKIP_TAGS, NUM_SKIP_TAGS)) {
|
|
return false;
|
|
}
|
|
|
|
// Serial.printf("Text: %s\n", element.GetText());
|
|
|
|
if (matches(tag_name, HEADER_TAGS, NUM_HEADER_TAGS)) {
|
|
insideBoldTag = true;
|
|
startNewTextBlock(CENTER_ALIGN);
|
|
} else if (matches(tag_name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
|
|
if (strcmp(tag_name, "br") == 0) {
|
|
startNewTextBlock(currentTextBlock->get_style());
|
|
} else {
|
|
startNewTextBlock(JUSTIFIED);
|
|
}
|
|
} else if (matches(tag_name, BOLD_TAGS, NUM_BOLD_TAGS)) {
|
|
insideBoldTag = true;
|
|
} else if (matches(tag_name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
|
|
insideItalicTag = true;
|
|
}
|
|
return true;
|
|
}
|
|
/// Visit a text node.
|
|
bool EpubHtmlParser::Visit(const tinyxml2::XMLText& text) {
|
|
const char* content = text.Value();
|
|
currentTextBlock->addSpan(replaceHtmlEntities(content), insideBoldTag, insideItalicTag);
|
|
return true;
|
|
}
|
|
|
|
bool EpubHtmlParser::VisitExit(const tinyxml2::XMLElement& element) {
|
|
const char* tag_name = element.Name();
|
|
if (matches(tag_name, HEADER_TAGS, NUM_HEADER_TAGS)) {
|
|
insideBoldTag = false;
|
|
} else if (matches(tag_name, BLOCK_TAGS, NUM_BLOCK_TAGS)) {
|
|
// nothing to do
|
|
} else if (matches(tag_name, BOLD_TAGS, NUM_BOLD_TAGS)) {
|
|
insideBoldTag = false;
|
|
} else if (matches(tag_name, ITALIC_TAGS, NUM_ITALIC_TAGS)) {
|
|
insideItalicTag = false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool EpubHtmlParser::parseAndBuildPages() {
|
|
startNewTextBlock(JUSTIFIED);
|
|
tinyxml2::XMLDocument doc(false, tinyxml2::COLLAPSE_WHITESPACE);
|
|
|
|
const tinyxml2::XMLError result = doc.LoadFile(filepath);
|
|
if (result != tinyxml2::XML_SUCCESS) {
|
|
Serial.printf("Failed to load file, Error: %s\n", tinyxml2::XMLDocument::ErrorIDToName(result));
|
|
return false;
|
|
}
|
|
|
|
doc.Accept(this);
|
|
if (currentTextBlock) {
|
|
makePages();
|
|
completePageFn(currentPage);
|
|
currentPage = nullptr;
|
|
delete currentTextBlock;
|
|
currentTextBlock = nullptr;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void EpubHtmlParser::makePages() {
|
|
if (!currentTextBlock) {
|
|
Serial.println("!! No text block to make pages for !!");
|
|
return;
|
|
}
|
|
|
|
if (!currentPage) {
|
|
currentPage = new Page();
|
|
}
|
|
|
|
const int lineHeight = renderer->getLineHeight();
|
|
const int pageHeight = renderer->getPageHeight();
|
|
|
|
// Long running task, make sure to let other things happen
|
|
vTaskDelay(1);
|
|
|
|
if (currentTextBlock->getType() == TEXT_BLOCK) {
|
|
const auto lines = currentTextBlock->splitIntoLines(renderer);
|
|
|
|
for (const auto line : lines) {
|
|
if (currentPage->nextY + lineHeight > pageHeight) {
|
|
completePageFn(currentPage);
|
|
currentPage = new Page();
|
|
}
|
|
|
|
currentPage->elements.push_back(new PageLine(line, currentPage->nextY));
|
|
currentPage->nextY += lineHeight;
|
|
}
|
|
// TODO: Fix spacing between paras
|
|
// add some extra line between blocks
|
|
currentPage->nextY += lineHeight / 2;
|
|
}
|
|
// TODO: Image block support
|
|
// if (block->getType() == BlockType::IMAGE_BLOCK) {
|
|
// ImageBlock *imageBlock = (ImageBlock *)block;
|
|
// if (y + imageBlock->height > page_height) {
|
|
// pages.push_back(new Page());
|
|
// y = 0;
|
|
// }
|
|
// pages.back()->elements.push_back(new PageImage(imageBlock, y));
|
|
// y += imageBlock->height;
|
|
// }
|
|
}
|