mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2025-12-18 23:27:44 +03:00
* Add expat and swap out ERB HTML parser * Increase EpubHtmlParserSlim file buffer to 1024 bytes * Cleanup TextBlock functions * Do not break words when leaving spans
99 lines
4.3 KiB
C
99 lines
4.3 KiB
C
/* This file is included!
|
|
__ __ _
|
|
___\ \/ /_ __ __ _| |_
|
|
/ _ \\ /| '_ \ / _` | __|
|
|
| __// \| |_) | (_| | |_
|
|
\___/_/\_\ .__/ \__,_|\__|
|
|
|_| XML parser
|
|
|
|
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
|
|
Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
|
|
Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
|
|
Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
|
|
Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
|
|
Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org>
|
|
Licensed under the MIT license:
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
a copy of this software and associated documentation files (the
|
|
"Software"), to deal in the Software without restriction, including
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
distribute, sublicense, and/or sell copies of the Software, and to permit
|
|
persons to whom the Software is furnished to do so, subject to the
|
|
following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
|
|
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifdef XML_TOK_NS_C
|
|
|
|
const ENCODING* NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; }
|
|
|
|
const ENCODING* NS(XmlGetUtf16InternalEncoding)(void) {
|
|
#if BYTEORDER == 1234
|
|
return &ns(internal_little2_encoding).enc;
|
|
#elif BYTEORDER == 4321
|
|
return &ns(internal_big2_encoding).enc;
|
|
#else
|
|
const short n = 1;
|
|
return (*(const char*)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc);
|
|
#endif
|
|
}
|
|
|
|
static const ENCODING* const NS(encodings)[] = {
|
|
&ns(latin1_encoding).enc, &ns(ascii_encoding).enc, &ns(utf8_encoding).enc, &ns(big2_encoding).enc,
|
|
&ns(big2_encoding).enc, &ns(little2_encoding).enc, &ns(utf8_encoding).enc /* NO_ENC */
|
|
};
|
|
|
|
static int PTRCALL NS(initScanProlog)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) {
|
|
return initScan(NS(encodings), (const INIT_ENCODING*)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
|
|
}
|
|
|
|
static int PTRCALL NS(initScanContent)(const ENCODING* enc, const char* ptr, const char* end, const char** nextTokPtr) {
|
|
return initScan(NS(encodings), (const INIT_ENCODING*)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
|
|
}
|
|
|
|
int NS(XmlInitEncoding)(INIT_ENCODING* p, const ENCODING** encPtr, const char* name) {
|
|
int i = getEncodingIndex(name);
|
|
if (i == UNKNOWN_ENC) return 0;
|
|
SET_INIT_ENC_INDEX(p, i);
|
|
p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog);
|
|
p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent);
|
|
p->initEnc.updatePosition = initUpdatePosition;
|
|
p->encPtr = encPtr;
|
|
*encPtr = &(p->initEnc);
|
|
return 1;
|
|
}
|
|
|
|
static const ENCODING* NS(findEncoding)(const ENCODING* enc, const char* ptr, const char* end) {
|
|
#define ENCODING_MAX 128
|
|
char buf[ENCODING_MAX] = "";
|
|
char* p = buf;
|
|
int i;
|
|
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
|
|
if (ptr != end) return 0;
|
|
*p = 0;
|
|
if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc;
|
|
i = getEncodingIndex(buf);
|
|
if (i == UNKNOWN_ENC) return 0;
|
|
return NS(encodings)[i];
|
|
}
|
|
|
|
int NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING* enc, const char* ptr, const char* end,
|
|
const char** badPtr, const char** versionPtr, const char** versionEndPtr,
|
|
const char** encodingName, const ENCODING** encoding, int* standalone) {
|
|
return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, versionEndPtr,
|
|
encodingName, encoding, standalone);
|
|
}
|
|
|
|
#endif /* XML_TOK_NS_C */
|