Skip pagebreak blocks when parsing epub file (#58)
Some checks are pending
CI / build (push) Waiting to run

## Summary

* Skip pagebreak blocks when parsing epub file
* These blocks break the flow and often contain the page number in them
which should not interrupt the flow of the content
- Attributes sourced from:
  - https://www.w3.org/TR/epub-ssv-11/#pagebreak
  - https://www.w3.org/TR/dpub-aria-1.1/#doc-pagebreak
This commit is contained in:
Dave Allie 2025-12-19 01:11:03 +11:00 committed by GitHub
parent 70dc0f018e
commit b2020f5512
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -75,6 +75,18 @@ void XMLCALL ChapterHtmlSlimParser::startElement(void* userData, const XML_Char*
return; return;
} }
// Skip blocks with role="doc-pagebreak" and epub:type="pagebreak"
if (atts != nullptr) {
for (int i = 0; atts[i]; i += 2) {
if (strcmp(atts[i], "role") == 0 && strcmp(atts[i + 1], "doc-pagebreak") == 0 ||
strcmp(atts[i], "epub:type") == 0 && strcmp(atts[i + 1], "pagebreak") == 0) {
self->skipUntilDepth = self->depth;
self->depth += 1;
return;
}
}
}
if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) { if (matches(name, HEADER_TAGS, NUM_HEADER_TAGS)) {
self->startNewTextBlock(TextBlock::CENTER_ALIGN); self->startNewTextBlock(TextBlock::CENTER_ALIGN);
self->boldUntilDepth = min(self->boldUntilDepth, self->depth); self->boldUntilDepth = min(self->boldUntilDepth, self->depth);