diff options
Diffstat (limited to 'school/node_modules/saxes/saxes.js')
-rw-r--r-- | school/node_modules/saxes/saxes.js | 2064 |
1 files changed, 2064 insertions, 0 deletions
diff --git a/school/node_modules/saxes/saxes.js b/school/node_modules/saxes/saxes.js new file mode 100644 index 0000000..0436871 --- /dev/null +++ b/school/node_modules/saxes/saxes.js @@ -0,0 +1,2064 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const ed5 = require("xmlchars/xml/1.0/ed5"); +const ed2 = require("xmlchars/xml/1.1/ed2"); +const NSed3 = require("xmlchars/xmlns/1.0/ed3"); +var isS = ed5.isS; +var isChar10 = ed5.isChar; +var isNameStartChar = ed5.isNameStartChar; +var isNameChar = ed5.isNameChar; +var S_LIST = ed5.S_LIST; +var NAME_RE = ed5.NAME_RE; +var isChar11 = ed2.isChar; +var isNCNameStartChar = NSed3.isNCNameStartChar; +var isNCNameChar = NSed3.isNCNameChar; +var NC_NAME_RE = NSed3.NC_NAME_RE; +const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; +const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"; +const rootNS = { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + __proto__: null, + xml: XML_NAMESPACE, + xmlns: XMLNS_NAMESPACE, +}; +const XML_ENTITIES = { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + __proto__: null, + amp: "&", + gt: ">", + lt: "<", + quot: "\"", + apos: "'", +}; +// EOC: end-of-chunk +const EOC = -1; +const NL_LIKE = -2; +const S_BEGIN = 0; // Initial state. +const S_BEGIN_WHITESPACE = 1; // leading whitespace +const S_DOCTYPE = 2; // <!DOCTYPE +const S_DOCTYPE_QUOTE = 3; // <!DOCTYPE "//blah +const S_DTD = 4; // <!DOCTYPE "//blah" [ ... +const S_DTD_QUOTED = 5; // <!DOCTYPE "//blah" [ "foo +const S_DTD_OPEN_WAKA = 6; +const S_DTD_OPEN_WAKA_BANG = 7; +const S_DTD_COMMENT = 8; // <!-- +const S_DTD_COMMENT_ENDING = 9; // <!-- blah - +const S_DTD_COMMENT_ENDED = 10; // <!-- blah -- +const S_DTD_PI = 11; // <? +const S_DTD_PI_ENDING = 12; // <?hi "there" ? +const S_TEXT = 13; // general stuff +const S_ENTITY = 14; // & and such +const S_OPEN_WAKA = 15; // < +const S_OPEN_WAKA_BANG = 16; // <!... +const S_COMMENT = 17; // <!-- +const S_COMMENT_ENDING = 18; // <!-- blah - +const S_COMMENT_ENDED = 19; // <!-- blah -- +const S_CDATA = 20; // <![CDATA[ something +const S_CDATA_ENDING = 21; // ] +const S_CDATA_ENDING_2 = 22; // ]] +const S_PI_FIRST_CHAR = 23; // <?hi, first char +const S_PI_REST = 24; // <?hi, rest of the name +const S_PI_BODY = 25; // <?hi there +const S_PI_ENDING = 26; // <?hi "there" ? +const S_XML_DECL_NAME_START = 27; // <?xml +const S_XML_DECL_NAME = 28; // <?xml foo +const S_XML_DECL_EQ = 29; // <?xml foo= +const S_XML_DECL_VALUE_START = 30; // <?xml foo= +const S_XML_DECL_VALUE = 31; // <?xml foo="bar" +const S_XML_DECL_SEPARATOR = 32; // <?xml foo="bar" +const S_XML_DECL_ENDING = 33; // <?xml ... ? +const S_OPEN_TAG = 34; // <strong +const S_OPEN_TAG_SLASH = 35; // <strong / +const S_ATTRIB = 36; // <a +const S_ATTRIB_NAME = 37; // <a foo +const S_ATTRIB_NAME_SAW_WHITE = 38; // <a foo _ +const S_ATTRIB_VALUE = 39; // <a foo= +const S_ATTRIB_VALUE_QUOTED = 40; // <a foo="bar +const S_ATTRIB_VALUE_CLOSED = 41; // <a foo="bar" +const S_ATTRIB_VALUE_UNQUOTED = 42; // <a foo=bar +const S_CLOSE_TAG = 43; // </a +const S_CLOSE_TAG_SAW_WHITE = 44; // </a > +const TAB = 9; +const NL = 0xA; +const CR = 0xD; +const SPACE = 0x20; +const BANG = 0x21; +const DQUOTE = 0x22; +const AMP = 0x26; +const SQUOTE = 0x27; +const MINUS = 0x2D; +const FORWARD_SLASH = 0x2F; +const SEMICOLON = 0x3B; +const LESS = 0x3C; +const EQUAL = 0x3D; +const GREATER = 0x3E; +const QUESTION = 0x3F; +const OPEN_BRACKET = 0x5B; +const CLOSE_BRACKET = 0x5D; +const NEL = 0x85; +const LS = 0x2028; // Line Separator +const isQuote = (c) => c === DQUOTE || c === SQUOTE; +const QUOTES = [DQUOTE, SQUOTE]; +const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER]; +const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET]; +const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST]; +const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS]; +function nsPairCheck(parser, prefix, uri) { + switch (prefix) { + case "xml": + if (uri !== XML_NAMESPACE) { + parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`); + } + break; + case "xmlns": + if (uri !== XMLNS_NAMESPACE) { + parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`); + } + break; + default: + } + switch (uri) { + case XMLNS_NAMESPACE: + parser.fail(prefix === "" ? + `the default namespace may not be set to ${uri}.` : + `may not assign a prefix (even "xmlns") to the URI \ +${XMLNS_NAMESPACE}.`); + break; + case XML_NAMESPACE: + switch (prefix) { + case "xml": + // Assinging the XML namespace to "xml" is fine. + break; + case "": + parser.fail(`the default namespace may not be set to ${uri}.`); + break; + default: + parser.fail("may not assign the xml namespace to another prefix."); + } + break; + default: + } +} +function nsMappingCheck(parser, mapping) { + for (const local of Object.keys(mapping)) { + nsPairCheck(parser, local, mapping[local]); + } +} +const isNCName = (name) => NC_NAME_RE.test(name); +const isName = (name) => NAME_RE.test(name); +const FORBIDDEN_START = 0; +const FORBIDDEN_BRACKET = 1; +const FORBIDDEN_BRACKET_BRACKET = 2; +/** + * The list of supported events. + */ +exports.EVENTS = [ + "xmldecl", + "text", + "processinginstruction", + "doctype", + "comment", + "opentagstart", + "attribute", + "opentag", + "closetag", + "cdata", + "error", + "end", + "ready", +]; +const EVENT_NAME_TO_HANDLER_NAME = { + xmldecl: "xmldeclHandler", + text: "textHandler", + processinginstruction: "piHandler", + doctype: "doctypeHandler", + comment: "commentHandler", + opentagstart: "openTagStartHandler", + attribute: "attributeHandler", + opentag: "openTagHandler", + closetag: "closeTagHandler", + cdata: "cdataHandler", + error: "errorHandler", + end: "endHandler", + ready: "readyHandler", +}; +class SaxesParser { + /** + * @param opt The parser options. + */ + constructor(opt) { + this.opt = opt !== null && opt !== void 0 ? opt : {}; + this.fragmentOpt = !!this.opt.fragment; + const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns; + this.trackPosition = this.opt.position !== false; + this.fileName = this.opt.fileName; + if (xmlnsOpt) { + // This is the function we use to perform name checks on PIs and entities. + // When namespaces are used, colons are not allowed in PI target names or + // entity names. So the check depends on whether namespaces are used. See: + // + // https://www.w3.org/XML/xml-names-19990114-errata.html + // NE08 + // + this.nameStartCheck = isNCNameStartChar; + this.nameCheck = isNCNameChar; + this.isName = isNCName; + // eslint-disable-next-line @typescript-eslint/unbound-method + this.processAttribs = this.processAttribsNS; + // eslint-disable-next-line @typescript-eslint/unbound-method + this.pushAttrib = this.pushAttribNS; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + this.ns = Object.assign({ __proto__: null }, rootNS); + const additional = this.opt.additionalNamespaces; + if (additional != null) { + nsMappingCheck(this, additional); + Object.assign(this.ns, additional); + } + } + else { + this.nameStartCheck = isNameStartChar; + this.nameCheck = isNameChar; + this.isName = isName; + // eslint-disable-next-line @typescript-eslint/unbound-method + this.processAttribs = this.processAttribsPlain; + // eslint-disable-next-line @typescript-eslint/unbound-method + this.pushAttrib = this.pushAttribPlain; + } + // + // The order of the members in this table needs to correspond to the state + // numbers given to the states that correspond to the methods being recorded + // here. + // + this.stateTable = [ + /* eslint-disable @typescript-eslint/unbound-method */ + this.sBegin, + this.sBeginWhitespace, + this.sDoctype, + this.sDoctypeQuote, + this.sDTD, + this.sDTDQuoted, + this.sDTDOpenWaka, + this.sDTDOpenWakaBang, + this.sDTDComment, + this.sDTDCommentEnding, + this.sDTDCommentEnded, + this.sDTDPI, + this.sDTDPIEnding, + this.sText, + this.sEntity, + this.sOpenWaka, + this.sOpenWakaBang, + this.sComment, + this.sCommentEnding, + this.sCommentEnded, + this.sCData, + this.sCDataEnding, + this.sCDataEnding2, + this.sPIFirstChar, + this.sPIRest, + this.sPIBody, + this.sPIEnding, + this.sXMLDeclNameStart, + this.sXMLDeclName, + this.sXMLDeclEq, + this.sXMLDeclValueStart, + this.sXMLDeclValue, + this.sXMLDeclSeparator, + this.sXMLDeclEnding, + this.sOpenTag, + this.sOpenTagSlash, + this.sAttrib, + this.sAttribName, + this.sAttribNameSawWhite, + this.sAttribValue, + this.sAttribValueQuoted, + this.sAttribValueClosed, + this.sAttribValueUnquoted, + this.sCloseTag, + this.sCloseTagSawWhite, + ]; + this._init(); + } + /** + * Indicates whether or not the parser is closed. If ``true``, wait for + * the ``ready`` event to write again. + */ + get closed() { + return this._closed; + } + _init() { + var _a; + this.openWakaBang = ""; + this.text = ""; + this.name = ""; + this.piTarget = ""; + this.entity = ""; + this.q = null; + this.tags = []; + this.tag = null; + this.topNS = null; + this.chunk = ""; + this.chunkPosition = 0; + this.i = 0; + this.prevI = 0; + this.carriedFromPrevious = undefined; + this.forbiddenState = FORBIDDEN_START; + this.attribList = []; + // The logic is organized so as to minimize the need to check + // this.opt.fragment while parsing. + const { fragmentOpt } = this; + this.state = fragmentOpt ? S_TEXT : S_BEGIN; + // We want these to be all true if we are dealing with a fragment. + this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot = + this.sawRoot = fragmentOpt; + // An XML declaration is intially possible only when parsing whole + // documents. + this.xmlDeclPossible = !fragmentOpt; + this.xmlDeclExpects = ["version"]; + this.entityReturnState = undefined; + let { defaultXMLVersion } = this.opt; + if (defaultXMLVersion === undefined) { + if (this.opt.forceXMLVersion === true) { + throw new Error("forceXMLVersion set but defaultXMLVersion is not set"); + } + defaultXMLVersion = "1.0"; + } + this.setXMLVersion(defaultXMLVersion); + this.positionAtNewLine = 0; + this.doctype = false; + this._closed = false; + this.xmlDecl = { + version: undefined, + encoding: undefined, + standalone: undefined, + }; + this.line = 1; + this.column = 0; + this.ENTITIES = Object.create(XML_ENTITIES); + // eslint-disable-next-line no-unused-expressions + (_a = this.readyHandler) === null || _a === void 0 ? void 0 : _a.call(this); + } + /** + * The stream position the parser is currently looking at. This field is + * zero-based. + * + * This field is not based on counting Unicode characters but is to be + * interpreted as a plain index into a JavaScript string. + */ + get position() { + return this.chunkPosition + this.i; + } + /** + * The column number of the next character to be read by the parser. * + * This field is zero-based. (The first column in a line is 0.) + * + * This field reports the index at which the next character would be in the + * line if the line were represented as a JavaScript string. Note that this + * *can* be different to a count based on the number of *Unicode characters* + * due to how JavaScript handles astral plane characters. + * + * See [[column]] for a number that corresponds to a count of Unicode + * characters. + */ + get columnIndex() { + return this.position - this.positionAtNewLine; + } + /** + * Set an event listener on an event. The parser supports one handler per + * event type. If you try to set an event handler over an existing handler, + * the old handler is silently overwritten. + * + * @param name The event to listen to. + * + * @param handler The handler to set. + */ + on(name, handler) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler; + } + /** + * Unset an event handler. + * + * @parma name The event to stop listening to. + */ + off(name) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + this[EVENT_NAME_TO_HANDLER_NAME[name]] = undefined; + } + /** + * Make an error object. The error object will have a message that contains + * the ``fileName`` option passed at the creation of the parser. If position + * tracking was turned on, it will also have line and column number + * information. + * + * @param message The message describing the error to report. + * + * @returns An error object with a properly formatted message. + */ + makeError(message) { + var _a; + let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : ""; + if (this.trackPosition) { + if (msg.length > 0) { + msg += ":"; + } + msg += `${this.line}:${this.column}`; + } + if (msg.length > 0) { + msg += ": "; + } + return new Error(msg + message); + } + /** + * Report a parsing error. This method is made public so that client code may + * check for issues that are outside the scope of this project and can report + * errors. + * + * @param message The error to report. + * + * @returns this + */ + fail(message) { + const err = this.makeError(message); + const handler = this.errorHandler; + if (handler === undefined) { + throw err; + } + else { + handler(err); + } + return this; + } + /** + * Write a XML data to the parser. + * + * @param chunk The XML data to write. + * + * @returns this + */ + write(chunk) { + if (this.closed) { + return this.fail("cannot write after close; assign an onready handler."); + } + let end = false; + if (chunk === null) { + // We cannot return immediately because carriedFromPrevious may need + // processing. + end = true; + chunk = ""; + } + else if (typeof chunk === "object") { + chunk = chunk.toString(); + } + // We checked if performing a pre-decomposition of the string into an array + // of single complete characters (``Array.from(chunk)``) would be faster + // than the current repeated calls to ``charCodeAt``. As of August 2018, it + // isn't. (There may be Node-specific code that would perform faster than + // ``Array.from`` but don't want to be dependent on Node.) + if (this.carriedFromPrevious !== undefined) { + // The previous chunk had char we must carry over. + chunk = `${this.carriedFromPrevious}${chunk}`; + this.carriedFromPrevious = undefined; + } + let limit = chunk.length; + const lastCode = chunk.charCodeAt(limit - 1); + if (!end && + // A trailing CR or surrogate must be carried over to the next + // chunk. + (lastCode === CR || (lastCode >= 0xD800 && lastCode <= 0xDBFF))) { + // The chunk ends with a character that must be carried over. We cannot + // know how to handle it until we get the next chunk or the end of the + // stream. So save it for later. + this.carriedFromPrevious = chunk[limit - 1]; + limit--; + chunk = chunk.slice(0, limit); + } + const { stateTable } = this; + this.chunk = chunk; + this.i = 0; + while (this.i < limit) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + stateTable[this.state].call(this); + } + this.chunkPosition += limit; + return end ? this.end() : this; + } + /** + * Close the current stream. Perform final well-formedness checks and reset + * the parser tstate. + * + * @returns this + */ + close() { + return this.write(null); + } + /** + * Get a single code point out of the current chunk. This updates the current + * position if we do position tracking. + * + * This is the algorithm to use for XML 1.0. + * + * @returns The character read. + */ + getCode10() { + const { chunk, i } = this; + this.prevI = i; + // Yes, we do this instead of doing this.i++. Doing it this way, we do not + // read this.i again, which is a bit faster. + this.i = i + 1; + if (i >= chunk.length) { + return EOC; + } + // Using charCodeAt and handling the surrogates ourselves is faster + // than using codePointAt. + const code = chunk.charCodeAt(i); + this.column++; + if (code < 0xD800) { + if (code >= SPACE || code === TAB) { + return code; + } + switch (code) { + case NL: + this.line++; + this.column = 0; + this.positionAtNewLine = this.position; + return NL; + case CR: + // We may get NaN if we read past the end of the chunk, which is fine. + if (chunk.charCodeAt(i + 1) === NL) { + // A \r\n sequence is converted to \n so we have to skip over the + // next character. We already know it has a size of 1 so ++ is fine + // here. + this.i = i + 2; + } + // Otherwise, a \r is just converted to \n, so we don't have to skip + // ahead. + // In either case, \r becomes \n. + this.line++; + this.column = 0; + this.positionAtNewLine = this.position; + return NL_LIKE; + default: + // If we get here, then code < SPACE and it is not NL CR or TAB. + this.fail("disallowed character."); + return code; + } + } + if (code > 0xDBFF) { + // This is a specialized version of isChar10 that takes into account + // that in this context code > 0xDBFF and code <= 0xFFFF. So it does not + // test cases that don't need testing. + if (!(code >= 0xE000 && code <= 0xFFFD)) { + this.fail("disallowed character."); + } + return code; + } + const final = 0x10000 + ((code - 0xD800) * 0x400) + + (chunk.charCodeAt(i + 1) - 0xDC00); + this.i = i + 2; + // This is a specialized version of isChar10 that takes into account that in + // this context necessarily final >= 0x10000. + if (final > 0x10FFFF) { + this.fail("disallowed character."); + } + return final; + } + /** + * Get a single code point out of the current chunk. This updates the current + * position if we do position tracking. + * + * This is the algorithm to use for XML 1.1. + * + * @returns {number} The character read. + */ + getCode11() { + const { chunk, i } = this; + this.prevI = i; + // Yes, we do this instead of doing this.i++. Doing it this way, we do not + // read this.i again, which is a bit faster. + this.i = i + 1; + if (i >= chunk.length) { + return EOC; + } + // Using charCodeAt and handling the surrogates ourselves is faster + // than using codePointAt. + const code = chunk.charCodeAt(i); + this.column++; + if (code < 0xD800) { + if ((code > 0x1F && code < 0x7F) || (code > 0x9F && code !== LS) || + code === TAB) { + return code; + } + switch (code) { + case NL: // 0xA + this.line++; + this.column = 0; + this.positionAtNewLine = this.position; + return NL; + case CR: { // 0xD + // We may get NaN if we read past the end of the chunk, which is + // fine. + const next = chunk.charCodeAt(i + 1); + if (next === NL || next === NEL) { + // A CR NL or CR NEL sequence is converted to NL so we have to skip + // over the next character. We already know it has a size of 1. + this.i = i + 2; + } + // Otherwise, a CR is just converted to NL, no skip. + } + /* yes, fall through */ + case NEL: // 0x85 + case LS: // Ox2028 + this.line++; + this.column = 0; + this.positionAtNewLine = this.position; + return NL_LIKE; + default: + this.fail("disallowed character."); + return code; + } + } + if (code > 0xDBFF) { + // This is a specialized version of isCharAndNotRestricted that takes into + // account that in this context code > 0xDBFF and code <= 0xFFFF. So it + // does not test cases that don't need testing. + if (!(code >= 0xE000 && code <= 0xFFFD)) { + this.fail("disallowed character."); + } + return code; + } + const final = 0x10000 + ((code - 0xD800) * 0x400) + + (chunk.charCodeAt(i + 1) - 0xDC00); + this.i = i + 2; + // This is a specialized version of isCharAndNotRestricted that takes into + // account that in this context necessarily final >= 0x10000. + if (final > 0x10FFFF) { + this.fail("disallowed character."); + } + return final; + } + /** + * Like ``getCode`` but with the return value normalized so that ``NL`` is + * returned for ``NL_LIKE``. + */ + getCodeNorm() { + const c = this.getCode(); + return c === NL_LIKE ? NL : c; + } + unget() { + this.i = this.prevI; + this.column--; + } + /** + * Capture characters into a buffer until encountering one of a set of + * characters. + * + * @param chars An array of codepoints. Encountering a character in the array + * ends the capture. (``chars`` may safely contain ``NL``.) + * + * @return The character code that made the capture end, or ``EOC`` if we hit + * the end of the chunk. The return value cannot be NL_LIKE: NL is returned + * instead. + */ + captureTo(chars) { + let { i: start } = this; + const { chunk } = this; + // eslint-disable-next-line no-constant-condition + while (true) { + const c = this.getCode(); + const isNLLike = c === NL_LIKE; + const final = isNLLike ? NL : c; + if (final === EOC || chars.includes(final)) { + this.text += chunk.slice(start, this.prevI); + return final; + } + if (isNLLike) { + this.text += `${chunk.slice(start, this.prevI)}\n`; + start = this.i; + } + } + } + /** + * Capture characters into a buffer until encountering a character. + * + * @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT + * CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior. + * + * @return ``true`` if we ran into the character. Otherwise, we ran into the + * end of the current chunk. + */ + captureToChar(char) { + let { i: start } = this; + const { chunk } = this; + // eslint-disable-next-line no-constant-condition + while (true) { + let c = this.getCode(); + switch (c) { + case NL_LIKE: + this.text += `${chunk.slice(start, this.prevI)}\n`; + start = this.i; + c = NL; + break; + case EOC: + this.text += chunk.slice(start); + return false; + default: + } + if (c === char) { + this.text += chunk.slice(start, this.prevI); + return true; + } + } + } + /** + * Capture characters that satisfy ``isNameChar`` into the ``name`` field of + * this parser. + * + * @return The character code that made the test fail, or ``EOC`` if we hit + * the end of the chunk. The return value cannot be NL_LIKE: NL is returned + * instead. + */ + captureNameChars() { + const { chunk, i: start } = this; + // eslint-disable-next-line no-constant-condition + while (true) { + const c = this.getCode(); + if (c === EOC) { + this.name += chunk.slice(start); + return EOC; + } + // NL is not a name char so we don't have to test specifically for it. + if (!isNameChar(c)) { + this.name += chunk.slice(start, this.prevI); + return c === NL_LIKE ? NL : c; + } + } + } + /** + * Skip white spaces. + * + * @return The character that ended the skip, or ``EOC`` if we hit + * the end of the chunk. The return value cannot be NL_LIKE: NL is returned + * instead. + */ + skipSpaces() { + // eslint-disable-next-line no-constant-condition + while (true) { + const c = this.getCodeNorm(); + if (c === EOC || !isS(c)) { + return c; + } + } + } + setXMLVersion(version) { + this.currentXMLVersion = version; + /* eslint-disable @typescript-eslint/unbound-method */ + if (version === "1.0") { + this.isChar = isChar10; + this.getCode = this.getCode10; + } + else { + this.isChar = isChar11; + this.getCode = this.getCode11; + } + /* eslint-enable @typescript-eslint/unbound-method */ + } + // STATE ENGINE METHODS + // This needs to be a state separate from S_BEGIN_WHITESPACE because we want + // to be sure never to come back to this state later. + sBegin() { + // We are essentially peeking at the first character of the chunk. Since + // S_BEGIN can be in effect only when we start working on the first chunk, + // the index at which we must look is necessarily 0. Note also that the + // following test does not depend on decoding surrogates. + // If the initial character is 0xFEFF, ignore it. + if (this.chunk.charCodeAt(0) === 0xFEFF) { + this.i++; + this.column++; + } + this.state = S_BEGIN_WHITESPACE; + } + sBeginWhitespace() { + // We need to know whether we've encountered spaces or not because as soon + // as we run into a space, an XML declaration is no longer possible. Rather + // than slow down skipSpaces even in places where we don't care whether it + // skipped anything or not, we check whether prevI is equal to the value of + // i from before we skip spaces. + const iBefore = this.i; + const c = this.skipSpaces(); + if (this.prevI !== iBefore) { + this.xmlDeclPossible = false; + } + switch (c) { + case LESS: + this.state = S_OPEN_WAKA; + // We could naively call closeText but in this state, it is not normal + // to have text be filled with any data. + if (this.text.length !== 0) { + throw new Error("no-empty text at start"); + } + break; + case EOC: + break; + default: + this.unget(); + this.state = S_TEXT; + this.xmlDeclPossible = false; + } + } + sDoctype() { + var _a; + const c = this.captureTo(DOCTYPE_TERMINATOR); + switch (c) { + case GREATER: { + // eslint-disable-next-line no-unused-expressions + (_a = this.doctypeHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text); + this.text = ""; + this.state = S_TEXT; + this.doctype = true; // just remember that we saw it. + break; + } + case EOC: + break; + default: + this.text += String.fromCodePoint(c); + if (c === OPEN_BRACKET) { + this.state = S_DTD; + } + else if (isQuote(c)) { + this.state = S_DOCTYPE_QUOTE; + this.q = c; + } + } + } + sDoctypeQuote() { + const q = this.q; + if (this.captureToChar(q)) { + this.text += String.fromCodePoint(q); + this.q = null; + this.state = S_DOCTYPE; + } + } + sDTD() { + const c = this.captureTo(DTD_TERMINATOR); + if (c === EOC) { + return; + } + this.text += String.fromCodePoint(c); + if (c === CLOSE_BRACKET) { + this.state = S_DOCTYPE; + } + else if (c === LESS) { + this.state = S_DTD_OPEN_WAKA; + } + else if (isQuote(c)) { + this.state = S_DTD_QUOTED; + this.q = c; + } + } + sDTDQuoted() { + const q = this.q; + if (this.captureToChar(q)) { + this.text += String.fromCodePoint(q); + this.state = S_DTD; + this.q = null; + } + } + sDTDOpenWaka() { + const c = this.getCodeNorm(); + this.text += String.fromCodePoint(c); + switch (c) { + case BANG: + this.state = S_DTD_OPEN_WAKA_BANG; + this.openWakaBang = ""; + break; + case QUESTION: + this.state = S_DTD_PI; + break; + default: + this.state = S_DTD; + } + } + sDTDOpenWakaBang() { + const char = String.fromCodePoint(this.getCodeNorm()); + const owb = this.openWakaBang += char; + this.text += char; + if (owb !== "-") { + this.state = owb === "--" ? S_DTD_COMMENT : S_DTD; + this.openWakaBang = ""; + } + } + sDTDComment() { + if (this.captureToChar(MINUS)) { + this.text += "-"; + this.state = S_DTD_COMMENT_ENDING; + } + } + sDTDCommentEnding() { + const c = this.getCodeNorm(); + this.text += String.fromCodePoint(c); + this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT; + } + sDTDCommentEnded() { + const c = this.getCodeNorm(); + this.text += String.fromCodePoint(c); + if (c === GREATER) { + this.state = S_DTD; + } + else { + this.fail("malformed comment."); + // <!-- blah -- bloo --> will be recorded as + // a comment of " blah -- bloo " + this.state = S_DTD_COMMENT; + } + } + sDTDPI() { + if (this.captureToChar(QUESTION)) { + this.text += "?"; + this.state = S_DTD_PI_ENDING; + } + } + sDTDPIEnding() { + const c = this.getCodeNorm(); + this.text += String.fromCodePoint(c); + if (c === GREATER) { + this.state = S_DTD; + } + } + sText() { + // + // We did try a version of saxes where the S_TEXT state was split in two + // states: one for text inside the root element, and one for text + // outside. This was avoiding having to test this.tags.length to decide + // what implementation to actually use. + // + // Peformance testing on gigabyte-size files did not show any advantage to + // using the two states solution instead of the current one. Conversely, it + // made the code a bit more complicated elsewhere. For instance, a comment + // can appear before the root element so when a comment ended it was + // necessary to determine whether to return to the S_TEXT state or to the + // new text-outside-root state. + // + if (this.tags.length !== 0) { + this.handleTextInRoot(); + } + else { + this.handleTextOutsideRoot(); + } + } + sEntity() { + // This is essentially a specialized version of captureToChar(SEMICOLON...) + let { i: start } = this; + const { chunk } = this; + // eslint-disable-next-line no-labels, no-restricted-syntax + loop: + // eslint-disable-next-line no-constant-condition + while (true) { + switch (this.getCode()) { + case NL_LIKE: + this.entity += `${chunk.slice(start, this.prevI)}\n`; + start = this.i; + break; + case SEMICOLON: { + const { entityReturnState } = this; + const entity = this.entity + chunk.slice(start, this.prevI); + this.state = entityReturnState; + let parsed; + if (entity === "") { + this.fail("empty entity name."); + parsed = "&;"; + } + else { + parsed = this.parseEntity(entity); + this.entity = ""; + } + if (entityReturnState !== S_TEXT || this.textHandler !== undefined) { + this.text += parsed; + } + // eslint-disable-next-line no-labels + break loop; + } + case EOC: + this.entity += chunk.slice(start); + // eslint-disable-next-line no-labels + break loop; + default: + } + } + } + sOpenWaka() { + // Reminder: a state handler is called with at least one character + // available in the current chunk. So the first call to get code inside of + // a state handler cannot return ``EOC``. That's why we don't test + // for it. + const c = this.getCode(); + // either a /, ?, !, or text is coming next. + if (isNameStartChar(c)) { + this.state = S_OPEN_TAG; + this.unget(); + this.xmlDeclPossible = false; + } + else { + switch (c) { + case FORWARD_SLASH: + this.state = S_CLOSE_TAG; + this.xmlDeclPossible = false; + break; + case BANG: + this.state = S_OPEN_WAKA_BANG; + this.openWakaBang = ""; + this.xmlDeclPossible = false; + break; + case QUESTION: + this.state = S_PI_FIRST_CHAR; + break; + default: + this.fail("disallowed character in tag name"); + this.state = S_TEXT; + this.xmlDeclPossible = false; + } + } + } + sOpenWakaBang() { + this.openWakaBang += String.fromCodePoint(this.getCodeNorm()); + switch (this.openWakaBang) { + case "[CDATA[": + if (!this.sawRoot && !this.reportedTextBeforeRoot) { + this.fail("text data outside of root node."); + this.reportedTextBeforeRoot = true; + } + if (this.closedRoot && !this.reportedTextAfterRoot) { + this.fail("text data outside of root node."); + this.reportedTextAfterRoot = true; + } + this.state = S_CDATA; + this.openWakaBang = ""; + break; + case "--": + this.state = S_COMMENT; + this.openWakaBang = ""; + break; + case "DOCTYPE": + this.state = S_DOCTYPE; + if (this.doctype || this.sawRoot) { + this.fail("inappropriately located doctype declaration."); + } + this.openWakaBang = ""; + break; + default: + // 7 happens to be the maximum length of the string that can possibly + // match one of the cases above. + if (this.openWakaBang.length >= 7) { + this.fail("incorrect syntax."); + } + } + } + sComment() { + if (this.captureToChar(MINUS)) { + this.state = S_COMMENT_ENDING; + } + } + sCommentEnding() { + var _a; + const c = this.getCodeNorm(); + if (c === MINUS) { + this.state = S_COMMENT_ENDED; + // eslint-disable-next-line no-unused-expressions + (_a = this.commentHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text); + this.text = ""; + } + else { + this.text += `-${String.fromCodePoint(c)}`; + this.state = S_COMMENT; + } + } + sCommentEnded() { + const c = this.getCodeNorm(); + if (c !== GREATER) { + this.fail("malformed comment."); + // <!-- blah -- bloo --> will be recorded as + // a comment of " blah -- bloo " + this.text += `--${String.fromCodePoint(c)}`; + this.state = S_COMMENT; + } + else { + this.state = S_TEXT; + } + } + sCData() { + if (this.captureToChar(CLOSE_BRACKET)) { + this.state = S_CDATA_ENDING; + } + } + sCDataEnding() { + const c = this.getCodeNorm(); + if (c === CLOSE_BRACKET) { + this.state = S_CDATA_ENDING_2; + } + else { + this.text += `]${String.fromCodePoint(c)}`; + this.state = S_CDATA; + } + } + sCDataEnding2() { + var _a; + const c = this.getCodeNorm(); + switch (c) { + case GREATER: { + // eslint-disable-next-line no-unused-expressions + (_a = this.cdataHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text); + this.text = ""; + this.state = S_TEXT; + break; + } + case CLOSE_BRACKET: + this.text += "]"; + break; + default: + this.text += `]]${String.fromCodePoint(c)}`; + this.state = S_CDATA; + } + } + // We need this separate state to check the first character fo the pi target + // with this.nameStartCheck which allows less characters than this.nameCheck. + sPIFirstChar() { + const c = this.getCodeNorm(); + // This is first because in the case where the file is well-formed this is + // the branch taken. We optimize for well-formedness. + if (this.nameStartCheck(c)) { + this.piTarget += String.fromCodePoint(c); + this.state = S_PI_REST; + } + else if (c === QUESTION || isS(c)) { + this.fail("processing instruction without a target."); + this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY; + } + else { + this.fail("disallowed character in processing instruction name."); + this.piTarget += String.fromCodePoint(c); + this.state = S_PI_REST; + } + } + sPIRest() { + // Capture characters into a piTarget while ``this.nameCheck`` run on the + // character read returns true. + const { chunk, i: start } = this; + // eslint-disable-next-line no-constant-condition + while (true) { + const c = this.getCodeNorm(); + if (c === EOC) { + this.piTarget += chunk.slice(start); + return; + } + // NL cannot satisfy this.nameCheck so we don't have to test specifically + // for it. + if (!this.nameCheck(c)) { + this.piTarget += chunk.slice(start, this.prevI); + const isQuestion = c === QUESTION; + if (isQuestion || isS(c)) { + if (this.piTarget === "xml") { + if (!this.xmlDeclPossible) { + this.fail("an XML declaration must be at the start of the document."); + } + this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START; + } + else { + this.state = isQuestion ? S_PI_ENDING : S_PI_BODY; + } + } + else { + this.fail("disallowed character in processing instruction name."); + this.piTarget += String.fromCodePoint(c); + } + break; + } + } + } + sPIBody() { + if (this.text.length === 0) { + const c = this.getCodeNorm(); + if (c === QUESTION) { + this.state = S_PI_ENDING; + } + else if (!isS(c)) { + this.text = String.fromCodePoint(c); + } + } + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + else if (this.captureToChar(QUESTION)) { + this.state = S_PI_ENDING; + } + } + sPIEnding() { + var _a; + const c = this.getCodeNorm(); + if (c === GREATER) { + const { piTarget } = this; + if (piTarget.toLowerCase() === "xml") { + this.fail("the XML declaration must appear at the start of the document."); + } + // eslint-disable-next-line no-unused-expressions + (_a = this.piHandler) === null || _a === void 0 ? void 0 : _a.call(this, { + target: piTarget, + body: this.text, + }); + this.piTarget = this.text = ""; + this.state = S_TEXT; + } + else if (c === QUESTION) { + // We ran into ?? as part of a processing instruction. We initially took + // the first ? as a sign that the PI was ending, but it is not. So we have + // to add it to the body but we take the new ? as a sign that the PI is + // ending. + this.text += "?"; + } + else { + this.text += `?${String.fromCodePoint(c)}`; + this.state = S_PI_BODY; + } + this.xmlDeclPossible = false; + } + sXMLDeclNameStart() { + const c = this.skipSpaces(); + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + if (c === QUESTION) { + // It is valid to go to S_XML_DECL_ENDING from this state. + this.state = S_XML_DECL_ENDING; + return; + } + if (c !== EOC) { + this.state = S_XML_DECL_NAME; + this.name = String.fromCodePoint(c); + } + } + sXMLDeclName() { + const c = this.captureTo(XML_DECL_NAME_TERMINATOR); + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + if (c === QUESTION) { + this.state = S_XML_DECL_ENDING; + this.name += this.text; + this.text = ""; + this.fail("XML declaration is incomplete."); + return; + } + if (!(isS(c) || c === EQUAL)) { + return; + } + this.name += this.text; + this.text = ""; + if (!this.xmlDeclExpects.includes(this.name)) { + switch (this.name.length) { + case 0: + this.fail("did not expect any more name/value pairs."); + break; + case 1: + this.fail(`expected the name ${this.xmlDeclExpects[0]}.`); + break; + default: + this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`); + } + } + this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ; + } + sXMLDeclEq() { + const c = this.getCodeNorm(); + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + if (c === QUESTION) { + this.state = S_XML_DECL_ENDING; + this.fail("XML declaration is incomplete."); + return; + } + if (isS(c)) { + return; + } + if (c !== EQUAL) { + this.fail("value required."); + } + this.state = S_XML_DECL_VALUE_START; + } + sXMLDeclValueStart() { + const c = this.getCodeNorm(); + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + if (c === QUESTION) { + this.state = S_XML_DECL_ENDING; + this.fail("XML declaration is incomplete."); + return; + } + if (isS(c)) { + return; + } + if (!isQuote(c)) { + this.fail("value must be quoted."); + this.q = SPACE; + } + else { + this.q = c; + } + this.state = S_XML_DECL_VALUE; + } + sXMLDeclValue() { + const c = this.captureTo([this.q, QUESTION]); + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + if (c === QUESTION) { + this.state = S_XML_DECL_ENDING; + this.text = ""; + this.fail("XML declaration is incomplete."); + return; + } + if (c === EOC) { + return; + } + const value = this.text; + this.text = ""; + switch (this.name) { + case "version": { + this.xmlDeclExpects = ["encoding", "standalone"]; + const version = value; + this.xmlDecl.version = version; + // This is the test specified by XML 1.0 but it is fine for XML 1.1. + if (!/^1\.[0-9]+$/.test(version)) { + this.fail("version number must match /^1\\.[0-9]+$/."); + } + // When forceXMLVersion is set, the XML declaration is ignored. + else if (!this.opt.forceXMLVersion) { + this.setXMLVersion(version); + } + break; + } + case "encoding": + if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) { + this.fail("encoding value must match \ +/^[A-Za-z0-9][A-Za-z0-9._-]*$/."); + } + this.xmlDeclExpects = ["standalone"]; + this.xmlDecl.encoding = value; + break; + case "standalone": + if (value !== "yes" && value !== "no") { + this.fail("standalone value must match \"yes\" or \"no\"."); + } + this.xmlDeclExpects = []; + this.xmlDecl.standalone = value; + break; + default: + // We don't need to raise an error here since we've already raised one + // when checking what name was expected. + } + this.name = ""; + this.state = S_XML_DECL_SEPARATOR; + } + sXMLDeclSeparator() { + const c = this.getCodeNorm(); + // The question mark character is not valid inside any of the XML + // declaration name/value pairs. + if (c === QUESTION) { + // It is valid to go to S_XML_DECL_ENDING from this state. + this.state = S_XML_DECL_ENDING; + return; + } + if (!isS(c)) { + this.fail("whitespace required."); + this.unget(); + } + this.state = S_XML_DECL_NAME_START; + } + sXMLDeclEnding() { + var _a; + const c = this.getCodeNorm(); + if (c === GREATER) { + if (this.piTarget !== "xml") { + this.fail("processing instructions are not allowed before root."); + } + else if (this.name !== "version" && + this.xmlDeclExpects.includes("version")) { + this.fail("XML declaration must contain a version."); + } + // eslint-disable-next-line no-unused-expressions + (_a = this.xmldeclHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.xmlDecl); + this.name = ""; + this.piTarget = this.text = ""; + this.state = S_TEXT; + } + else { + // We got here because the previous character was a ?, but the question + // mark character is not valid inside any of the XML declaration + // name/value pairs. + this.fail("The character ? is disallowed anywhere in XML declarations."); + } + this.xmlDeclPossible = false; + } + sOpenTag() { + var _a; + const c = this.captureNameChars(); + if (c === EOC) { + return; + } + const tag = this.tag = { + name: this.name, + attributes: Object.create(null), + }; + this.name = ""; + if (this.xmlnsOpt) { + this.topNS = tag.ns = Object.create(null); + } + // eslint-disable-next-line no-unused-expressions + (_a = this.openTagStartHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag); + this.sawRoot = true; + if (!this.fragmentOpt && this.closedRoot) { + this.fail("documents may contain only one root."); + } + switch (c) { + case GREATER: + this.openTag(); + break; + case FORWARD_SLASH: + this.state = S_OPEN_TAG_SLASH; + break; + default: + if (!isS(c)) { + this.fail("disallowed character in tag name."); + } + this.state = S_ATTRIB; + } + } + sOpenTagSlash() { + if (this.getCode() === GREATER) { + this.openSelfClosingTag(); + } + else { + this.fail("forward-slash in opening tag not followed by >."); + this.state = S_ATTRIB; + } + } + sAttrib() { + const c = this.skipSpaces(); + if (c === EOC) { + return; + } + if (isNameStartChar(c)) { + this.unget(); + this.state = S_ATTRIB_NAME; + } + else if (c === GREATER) { + this.openTag(); + } + else if (c === FORWARD_SLASH) { + this.state = S_OPEN_TAG_SLASH; + } + else { + this.fail("disallowed character in attribute name."); + } + } + sAttribName() { + const c = this.captureNameChars(); + if (c === EQUAL) { + this.state = S_ATTRIB_VALUE; + } + else if (isS(c)) { + this.state = S_ATTRIB_NAME_SAW_WHITE; + } + else if (c === GREATER) { + this.fail("attribute without value."); + this.pushAttrib(this.name, this.name); + this.name = this.text = ""; + this.openTag(); + } + else if (c !== EOC) { + this.fail("disallowed character in attribute name."); + } + } + sAttribNameSawWhite() { + const c = this.skipSpaces(); + switch (c) { + case EOC: + return; + case EQUAL: + this.state = S_ATTRIB_VALUE; + break; + default: + this.fail("attribute without value."); + // Should we do this??? + // this.tag.attributes[this.name] = ""; + this.text = ""; + this.name = ""; + if (c === GREATER) { + this.openTag(); + } + else if (isNameStartChar(c)) { + this.unget(); + this.state = S_ATTRIB_NAME; + } + else { + this.fail("disallowed character in attribute name."); + this.state = S_ATTRIB; + } + } + } + sAttribValue() { + const c = this.getCodeNorm(); + if (isQuote(c)) { + this.q = c; + this.state = S_ATTRIB_VALUE_QUOTED; + } + else if (!isS(c)) { + this.fail("unquoted attribute value."); + this.state = S_ATTRIB_VALUE_UNQUOTED; + this.unget(); + } + } + sAttribValueQuoted() { + // We deliberately do not use captureTo here. The specialized code we use + // here is faster than using captureTo. + const { q, chunk } = this; + let { i: start } = this; + // eslint-disable-next-line no-constant-condition + while (true) { + switch (this.getCode()) { + case q: + this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI)); + this.name = this.text = ""; + this.q = null; + this.state = S_ATTRIB_VALUE_CLOSED; + return; + case AMP: + this.text += chunk.slice(start, this.prevI); + this.state = S_ENTITY; + this.entityReturnState = S_ATTRIB_VALUE_QUOTED; + return; + case NL: + case NL_LIKE: + case TAB: + this.text += `${chunk.slice(start, this.prevI)} `; + start = this.i; + break; + case LESS: + this.text += chunk.slice(start, this.prevI); + this.fail("disallowed character."); + return; + case EOC: + this.text += chunk.slice(start); + return; + default: + } + } + } + sAttribValueClosed() { + const c = this.getCodeNorm(); + if (isS(c)) { + this.state = S_ATTRIB; + } + else if (c === GREATER) { + this.openTag(); + } + else if (c === FORWARD_SLASH) { + this.state = S_OPEN_TAG_SLASH; + } + else if (isNameStartChar(c)) { + this.fail("no whitespace between attributes."); + this.unget(); + this.state = S_ATTRIB_NAME; + } + else { + this.fail("disallowed character in attribute name."); + } + } + sAttribValueUnquoted() { + // We don't do anything regarding EOL or space handling for unquoted + // attributes. We already have failed by the time we get here, and the + // contract that saxes upholds states that upon failure, it is not safe to + // rely on the data passed to event handlers (other than + // ``onerror``). Passing "bad" data is not a problem. + const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR); + switch (c) { + case AMP: + this.state = S_ENTITY; + this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED; + break; + case LESS: + this.fail("disallowed character."); + break; + case EOC: + break; + default: + if (this.text.includes("]]>")) { + this.fail("the string \"]]>\" is disallowed in char data."); + } + this.pushAttrib(this.name, this.text); + this.name = this.text = ""; + if (c === GREATER) { + this.openTag(); + } + else { + this.state = S_ATTRIB; + } + } + } + sCloseTag() { + const c = this.captureNameChars(); + if (c === GREATER) { + this.closeTag(); + } + else if (isS(c)) { + this.state = S_CLOSE_TAG_SAW_WHITE; + } + else if (c !== EOC) { + this.fail("disallowed character in closing tag."); + } + } + sCloseTagSawWhite() { + switch (this.skipSpaces()) { + case GREATER: + this.closeTag(); + break; + case EOC: + break; + default: + this.fail("disallowed character in closing tag."); + } + } + // END OF STATE ENGINE METHODS + handleTextInRoot() { + // This is essentially a specialized version of captureTo which is optimized + // for performing the ]]> check. A previous version of this code, checked + // ``this.text`` for the presence of ]]>. It simplified the code but was + // very costly when character data contained a lot of entities to be parsed. + // + // Since we are using a specialized loop, we also keep track of the presence + // of ]]> in text data. The sequence ]]> is forbidden to appear as-is. + // + let { i: start, forbiddenState } = this; + const { chunk, textHandler: handler } = this; + // eslint-disable-next-line no-labels, no-restricted-syntax + scanLoop: + // eslint-disable-next-line no-constant-condition + while (true) { + switch (this.getCode()) { + case LESS: { + this.state = S_OPEN_WAKA; + if (handler !== undefined) { + const { text } = this; + const slice = chunk.slice(start, this.prevI); + if (text.length !== 0) { + handler(text + slice); + this.text = ""; + } + else if (slice.length !== 0) { + handler(slice); + } + } + forbiddenState = FORBIDDEN_START; + // eslint-disable-next-line no-labels + break scanLoop; + } + case AMP: + this.state = S_ENTITY; + this.entityReturnState = S_TEXT; + if (handler !== undefined) { + this.text += chunk.slice(start, this.prevI); + } + forbiddenState = FORBIDDEN_START; + // eslint-disable-next-line no-labels + break scanLoop; + case CLOSE_BRACKET: + switch (forbiddenState) { + case FORBIDDEN_START: + forbiddenState = FORBIDDEN_BRACKET; + break; + case FORBIDDEN_BRACKET: + forbiddenState = FORBIDDEN_BRACKET_BRACKET; + break; + case FORBIDDEN_BRACKET_BRACKET: + break; + default: + throw new Error("impossible state"); + } + break; + case GREATER: + if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) { + this.fail("the string \"]]>\" is disallowed in char data."); + } + forbiddenState = FORBIDDEN_START; + break; + case NL_LIKE: + if (handler !== undefined) { + this.text += `${chunk.slice(start, this.prevI)}\n`; + } + start = this.i; + forbiddenState = FORBIDDEN_START; + break; + case EOC: + if (handler !== undefined) { + this.text += chunk.slice(start); + } + // eslint-disable-next-line no-labels + break scanLoop; + default: + forbiddenState = FORBIDDEN_START; + } + } + this.forbiddenState = forbiddenState; + } + handleTextOutsideRoot() { + // This is essentially a specialized version of captureTo which is optimized + // for a specialized task. We keep track of the presence of non-space + // characters in the text since these are errors when appearing outside the + // document root element. + let { i: start } = this; + const { chunk, textHandler: handler } = this; + let nonSpace = false; + // eslint-disable-next-line no-labels, no-restricted-syntax + outRootLoop: + // eslint-disable-next-line no-constant-condition + while (true) { + const code = this.getCode(); + switch (code) { + case LESS: { + this.state = S_OPEN_WAKA; + if (handler !== undefined) { + const { text } = this; + const slice = chunk.slice(start, this.prevI); + if (text.length !== 0) { + handler(text + slice); + this.text = ""; + } + else if (slice.length !== 0) { + handler(slice); + } + } + // eslint-disable-next-line no-labels + break outRootLoop; + } + case AMP: + this.state = S_ENTITY; + this.entityReturnState = S_TEXT; + if (handler !== undefined) { + this.text += chunk.slice(start, this.prevI); + } + nonSpace = true; + // eslint-disable-next-line no-labels + break outRootLoop; + case NL_LIKE: + if (handler !== undefined) { + this.text += `${chunk.slice(start, this.prevI)}\n`; + } + start = this.i; + break; + case EOC: + if (handler !== undefined) { + this.text += chunk.slice(start); + } + // eslint-disable-next-line no-labels + break outRootLoop; + default: + if (!isS(code)) { + nonSpace = true; + } + } + } + if (!nonSpace) { + return; + } + // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags + // to avoid reporting errors for every single character that is out of + // place. + if (!this.sawRoot && !this.reportedTextBeforeRoot) { + this.fail("text data outside of root node."); + this.reportedTextBeforeRoot = true; + } + if (this.closedRoot && !this.reportedTextAfterRoot) { + this.fail("text data outside of root node."); + this.reportedTextAfterRoot = true; + } + } + pushAttribNS(name, value) { + var _a; + const { prefix, local } = this.qname(name); + const attr = { name, prefix, local, value }; + this.attribList.push(attr); + // eslint-disable-next-line no-unused-expressions + (_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr); + if (prefix === "xmlns") { + const trimmed = value.trim(); + if (this.currentXMLVersion === "1.0" && trimmed === "") { + this.fail("invalid attempt to undefine prefix in XML 1.0"); + } + this.topNS[local] = trimmed; + nsPairCheck(this, local, trimmed); + } + else if (name === "xmlns") { + const trimmed = value.trim(); + this.topNS[""] = trimmed; + nsPairCheck(this, "", trimmed); + } + } + pushAttribPlain(name, value) { + var _a; + const attr = { name, value }; + this.attribList.push(attr); + // eslint-disable-next-line no-unused-expressions + (_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr); + } + /** + * End parsing. This performs final well-formedness checks and resets the + * parser to a clean state. + * + * @returns this + */ + end() { + var _a, _b; + if (!this.sawRoot) { + this.fail("document must contain a root element."); + } + const { tags } = this; + while (tags.length > 0) { + const tag = tags.pop(); + this.fail(`unclosed tag: ${tag.name}`); + } + if ((this.state !== S_BEGIN) && (this.state !== S_TEXT)) { + this.fail("unexpected end."); + } + const { text } = this; + if (text.length !== 0) { + // eslint-disable-next-line no-unused-expressions + (_a = this.textHandler) === null || _a === void 0 ? void 0 : _a.call(this, text); + this.text = ""; + } + this._closed = true; + // eslint-disable-next-line no-unused-expressions + (_b = this.endHandler) === null || _b === void 0 ? void 0 : _b.call(this); + this._init(); + return this; + } + /** + * Resolve a namespace prefix. + * + * @param prefix The prefix to resolve. + * + * @returns The namespace URI or ``undefined`` if the prefix is not defined. + */ + resolve(prefix) { + var _a, _b; + let uri = this.topNS[prefix]; + if (uri !== undefined) { + return uri; + } + const { tags } = this; + for (let index = tags.length - 1; index >= 0; index--) { + uri = tags[index].ns[prefix]; + if (uri !== undefined) { + return uri; + } + } + uri = this.ns[prefix]; + if (uri !== undefined) { + return uri; + } + return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix); + } + /** + * Parse a qname into its prefix and local name parts. + * + * @param name The name to parse + * + * @returns + */ + qname(name) { + // This is faster than using name.split(":"). + const colon = name.indexOf(":"); + if (colon === -1) { + return { prefix: "", local: name }; + } + const local = name.slice(colon + 1); + const prefix = name.slice(0, colon); + if (prefix === "" || local === "" || local.includes(":")) { + this.fail(`malformed name: ${name}.`); + } + return { prefix, local }; + } + processAttribsNS() { + var _a; + const { attribList } = this; + const tag = this.tag; + { + // add namespace info to tag + const { prefix, local } = this.qname(tag.name); + tag.prefix = prefix; + tag.local = local; + const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : ""; + if (prefix !== "") { + if (prefix === "xmlns") { + this.fail("tags may not have \"xmlns\" as prefix."); + } + if (uri === "") { + this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`); + tag.uri = prefix; + } + } + } + if (attribList.length === 0) { + return; + } + const { attributes } = tag; + const seen = new Set(); + // Note: do not apply default ns to attributes: + // http://www.w3.org/TR/REC-xml-names/#defaulting + for (const attr of attribList) { + const { name, prefix, local } = attr; + let uri; + let eqname; + if (prefix === "") { + uri = name === "xmlns" ? XMLNS_NAMESPACE : ""; + eqname = name; + } + else { + uri = this.resolve(prefix); + // if there's any attributes with an undefined namespace, + // then fail on them now. + if (uri === undefined) { + this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`); + uri = prefix; + } + eqname = `{${uri}}${local}`; + } + if (seen.has(eqname)) { + this.fail(`duplicate attribute: ${eqname}.`); + } + seen.add(eqname); + attr.uri = uri; + attributes[name] = attr; + } + this.attribList = []; + } + processAttribsPlain() { + const { attribList } = this; + // eslint-disable-next-line prefer-destructuring + const attributes = this.tag.attributes; + for (const { name, value } of attribList) { + if (attributes[name] !== undefined) { + this.fail(`duplicate attribute: ${name}.`); + } + attributes[name] = value; + } + this.attribList = []; + } + /** + * Handle a complete open tag. This parser code calls this once it has seen + * the whole tag. This method checks for well-formeness and then emits + * ``onopentag``. + */ + openTag() { + var _a; + this.processAttribs(); + const { tags } = this; + const tag = this.tag; + tag.isSelfClosing = false; + // There cannot be any pending text here due to the onopentagstart that was + // necessarily emitted before we get here. So we do not check text. + // eslint-disable-next-line no-unused-expressions + (_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag); + tags.push(tag); + this.state = S_TEXT; + this.name = ""; + } + /** + * Handle a complete self-closing tag. This parser code calls this once it has + * seen the whole tag. This method checks for well-formeness and then emits + * ``onopentag`` and ``onclosetag``. + */ + openSelfClosingTag() { + var _a, _b, _c; + this.processAttribs(); + const { tags } = this; + const tag = this.tag; + tag.isSelfClosing = true; + // There cannot be any pending text here due to the onopentagstart that was + // necessarily emitted before we get here. So we do not check text. + // eslint-disable-next-line no-unused-expressions + (_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag); + // eslint-disable-next-line no-unused-expressions + (_b = this.closeTagHandler) === null || _b === void 0 ? void 0 : _b.call(this, tag); + const top = this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null; + if (top === null) { + this.closedRoot = true; + } + this.state = S_TEXT; + this.name = ""; + } + /** + * Handle a complete close tag. This parser code calls this once it has seen + * the whole tag. This method checks for well-formeness and then emits + * ``onclosetag``. + */ + closeTag() { + const { tags, name } = this; + // Our state after this will be S_TEXT, no matter what, and we can clear + // tagName now. + this.state = S_TEXT; + this.name = ""; + if (name === "") { + this.fail("weird empty close tag."); + this.text += "</>"; + return; + } + const handler = this.closeTagHandler; + let l = tags.length; + while (l-- > 0) { + const tag = this.tag = tags.pop(); + this.topNS = tag.ns; + // eslint-disable-next-line no-unused-expressions + handler === null || handler === void 0 ? void 0 : handler(tag); + if (tag.name === name) { + break; + } + this.fail("unexpected close tag."); + } + if (l === 0) { + this.closedRoot = true; + } + else if (l < 0) { + this.fail(`unmatched closing tag: ${name}.`); + this.text += `</${name}>`; + } + } + /** + * Resolves an entity. Makes any necessary well-formedness checks. + * + * @param entity The entity to resolve. + * + * @returns The parsed entity. + */ + parseEntity(entity) { + // startsWith would be significantly slower for this test. + // eslint-disable-next-line @typescript-eslint/prefer-string-starts-ends-with + if (entity[0] !== "#") { + const defined = this.ENTITIES[entity]; + if (defined !== undefined) { + return defined; + } + this.fail(this.isName(entity) ? "undefined entity." : + "disallowed character in entity name."); + return `&${entity};`; + } + let num = NaN; + if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) { + num = parseInt(entity.slice(2), 16); + } + else if (/^#[0-9]+$/.test(entity)) { + num = parseInt(entity.slice(1), 10); + } + // The character reference is required to match the CHAR production. + if (!this.isChar(num)) { + this.fail("malformed character entity."); + return `&${entity};`; + } + return String.fromCodePoint(num); + } +} +exports.SaxesParser = SaxesParser; +//# sourceMappingURL=saxes.js.map
\ No newline at end of file |