From 953ddd82e48dd206cef5ac94456549aed13b3ad5 Mon Sep 17 00:00:00 2001 From: RaindropsSys Date: Fri, 17 Nov 2023 23:25:29 +0100 Subject: Updated 30 files and deleted 2976 files (automated) --- .../node_modules/parse5/dist/tokenizer/index.js | 2904 -------------------- 1 file changed, 2904 deletions(-) delete mode 100644 includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js (limited to 'includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js') diff --git a/includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js b/includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js deleted file mode 100644 index 217b7bc..0000000 --- a/includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js +++ /dev/null @@ -1,2904 +0,0 @@ -import { Preprocessor } from './preprocessor.js'; -import { CODE_POINTS as $, SEQUENCES as $$, REPLACEMENT_CHARACTER, isSurrogate, isUndefinedCodePoint, isControlCodePoint, } from '../common/unicode.js'; -import { TokenType, getTokenAttr, } from '../common/token.js'; -import { htmlDecodeTree, BinTrieFlags, determineBranch } from 'entities/lib/decode.js'; -import { ERR } from '../common/error-codes.js'; -import { TAG_ID, getTagID } from '../common/html.js'; -//C1 Unicode control character reference replacements -const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([ - [0x80, 8364], - [0x82, 8218], - [0x83, 402], - [0x84, 8222], - [0x85, 8230], - [0x86, 8224], - [0x87, 8225], - [0x88, 710], - [0x89, 8240], - [0x8a, 352], - [0x8b, 8249], - [0x8c, 338], - [0x8e, 381], - [0x91, 8216], - [0x92, 8217], - [0x93, 8220], - [0x94, 8221], - [0x95, 8226], - [0x96, 8211], - [0x97, 8212], - [0x98, 732], - [0x99, 8482], - [0x9a, 353], - [0x9b, 8250], - [0x9c, 339], - [0x9e, 382], - [0x9f, 376], -]); -//States -var State; -(function (State) { - State[State["DATA"] = 0] = "DATA"; - State[State["RCDATA"] = 1] = "RCDATA"; - State[State["RAWTEXT"] = 2] = "RAWTEXT"; - State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA"; - State[State["PLAINTEXT"] = 4] = "PLAINTEXT"; - State[State["TAG_OPEN"] = 5] = "TAG_OPEN"; - State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN"; - State[State["TAG_NAME"] = 7] = "TAG_NAME"; - State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN"; - State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN"; - State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME"; - State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN"; - State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN"; - State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME"; - State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN"; - State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN"; - State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME"; - State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START"; - State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH"; - State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED"; - State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH"; - State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH"; - State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"; - State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN"; - State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME"; - State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START"; - State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED"; - State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH"; - State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"; - State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"; - State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END"; - State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME"; - State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME"; - State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME"; - State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE"; - State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED"; - State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED"; - State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED"; - State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED"; - State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG"; - State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT"; - State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN"; - State[State["COMMENT_START"] = 42] = "COMMENT_START"; - State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH"; - State[State["COMMENT"] = 44] = "COMMENT"; - State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN"; - State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG"; - State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH"; - State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"; - State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH"; - State[State["COMMENT_END"] = 50] = "COMMENT_END"; - State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG"; - State[State["DOCTYPE"] = 52] = "DOCTYPE"; - State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME"; - State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME"; - State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME"; - State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD"; - State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"; - State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"; - State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"; - State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER"; - State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"; - State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD"; - State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"; - State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"; - State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"; - State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER"; - State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE"; - State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION"; - State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET"; - State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END"; - State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE"; - State[State["NAMED_CHARACTER_REFERENCE"] = 72] = "NAMED_CHARACTER_REFERENCE"; - State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND"; - State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE"; - State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START"; - State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 76] = "HEXADEMICAL_CHARACTER_REFERENCE"; - State[State["DECIMAL_CHARACTER_REFERENCE"] = 77] = "DECIMAL_CHARACTER_REFERENCE"; - State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 78] = "NUMERIC_CHARACTER_REFERENCE_END"; -})(State || (State = {})); -//Tokenizer initial states for different modes -export const TokenizerMode = { - DATA: State.DATA, - RCDATA: State.RCDATA, - RAWTEXT: State.RAWTEXT, - SCRIPT_DATA: State.SCRIPT_DATA, - PLAINTEXT: State.PLAINTEXT, - CDATA_SECTION: State.CDATA_SECTION, -}; -//Utils -//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline -//this functions if they will be situated in another module due to context switch. -//Always perform inlining check before modifying this functions ('node --trace-inlining'). -function isAsciiDigit(cp) { - return cp >= $.DIGIT_0 && cp <= $.DIGIT_9; -} -function isAsciiUpper(cp) { - return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z; -} -function isAsciiLower(cp) { - return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z; -} -function isAsciiLetter(cp) { - return isAsciiLower(cp) || isAsciiUpper(cp); -} -function isAsciiAlphaNumeric(cp) { - return isAsciiLetter(cp) || isAsciiDigit(cp); -} -function isAsciiUpperHexDigit(cp) { - return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_F; -} -function isAsciiLowerHexDigit(cp) { - return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_F; -} -function isAsciiHexDigit(cp) { - return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp); -} -function toAsciiLower(cp) { - return cp + 32; -} -function isWhitespace(cp) { - return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED; -} -function isEntityInAttributeInvalidEnd(nextCp) { - return nextCp === $.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp); -} -function isScriptDataDoubleEscapeSequenceEnd(cp) { - return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN; -} -//Tokenizer -export class Tokenizer { - constructor(options, handler) { - this.options = options; - this.handler = handler; - this.paused = false; - /** Ensures that the parsing loop isn't run multiple times at once. */ - this.inLoop = false; - /** - * Indicates that the current adjusted node exists, is not an element in the HTML namespace, - * and that it is not an integration point for either MathML or HTML. - * - * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction} - */ - this.inForeignNode = false; - this.lastStartTagName = ''; - this.active = false; - this.state = State.DATA; - this.returnState = State.DATA; - this.charRefCode = -1; - this.consumedAfterSnapshot = -1; - this.currentCharacterToken = null; - this.currentToken = null; - this.currentAttr = { name: '', value: '' }; - this.preprocessor = new Preprocessor(handler); - this.currentLocation = this.getCurrentLocation(-1); - } - //Errors - _err(code) { - var _a, _b; - (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code)); - } - // NOTE: `offset` may never run across line boundaries. - getCurrentLocation(offset) { - if (!this.options.sourceCodeLocationInfo) { - return null; - } - return { - startLine: this.preprocessor.line, - startCol: this.preprocessor.col - offset, - startOffset: this.preprocessor.offset - offset, - endLine: -1, - endCol: -1, - endOffset: -1, - }; - } - _runParsingLoop() { - if (this.inLoop) - return; - this.inLoop = true; - while (this.active && !this.paused) { - this.consumedAfterSnapshot = 0; - const cp = this._consume(); - if (!this._ensureHibernation()) { - this._callState(cp); - } - } - this.inLoop = false; - } - //API - pause() { - this.paused = true; - } - resume(writeCallback) { - if (!this.paused) { - throw new Error('Parser was already resumed'); - } - this.paused = false; - // Necessary for synchronous resume. - if (this.inLoop) - return; - this._runParsingLoop(); - if (!this.paused) { - writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); - } - } - write(chunk, isLastChunk, writeCallback) { - this.active = true; - this.preprocessor.write(chunk, isLastChunk); - this._runParsingLoop(); - if (!this.paused) { - writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); - } - } - insertHtmlAtCurrentPos(chunk) { - this.active = true; - this.preprocessor.insertHtmlAtCurrentPos(chunk); - this._runParsingLoop(); - } - //Hibernation - _ensureHibernation() { - if (this.preprocessor.endOfChunkHit) { - this._unconsume(this.consumedAfterSnapshot); - this.active = false; - return true; - } - return false; - } - //Consumption - _consume() { - this.consumedAfterSnapshot++; - return this.preprocessor.advance(); - } - _unconsume(count) { - this.consumedAfterSnapshot -= count; - this.preprocessor.retreat(count); - } - _reconsumeInState(state, cp) { - this.state = state; - this._callState(cp); - } - _advanceBy(count) { - this.consumedAfterSnapshot += count; - for (let i = 0; i < count; i++) { - this.preprocessor.advance(); - } - } - _consumeSequenceIfMatch(pattern, caseSensitive) { - if (this.preprocessor.startsWith(pattern, caseSensitive)) { - // We will already have consumed one character before calling this method. - this._advanceBy(pattern.length - 1); - return true; - } - return false; - } - //Token creation - _createStartTagToken() { - this.currentToken = { - type: TokenType.START_TAG, - tagName: '', - tagID: TAG_ID.UNKNOWN, - selfClosing: false, - ackSelfClosing: false, - attrs: [], - location: this.getCurrentLocation(1), - }; - } - _createEndTagToken() { - this.currentToken = { - type: TokenType.END_TAG, - tagName: '', - tagID: TAG_ID.UNKNOWN, - selfClosing: false, - ackSelfClosing: false, - attrs: [], - location: this.getCurrentLocation(2), - }; - } - _createCommentToken(offset) { - this.currentToken = { - type: TokenType.COMMENT, - data: '', - location: this.getCurrentLocation(offset), - }; - } - _createDoctypeToken(initialName) { - this.currentToken = { - type: TokenType.DOCTYPE, - name: initialName, - forceQuirks: false, - publicId: null, - systemId: null, - location: this.currentLocation, - }; - } - _createCharacterToken(type, chars) { - this.currentCharacterToken = { - type, - chars, - location: this.currentLocation, - }; - } - //Tag attributes - _createAttr(attrNameFirstCh) { - this.currentAttr = { - name: attrNameFirstCh, - value: '', - }; - this.currentLocation = this.getCurrentLocation(0); - } - _leaveAttrName() { - var _a; - var _b; - const token = this.currentToken; - if (getTokenAttr(token, this.currentAttr.name) === null) { - token.attrs.push(this.currentAttr); - if (token.location && this.currentLocation) { - const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null))); - attrLocations[this.currentAttr.name] = this.currentLocation; - // Set end location - this._leaveAttrValue(); - } - } - else { - this._err(ERR.duplicateAttribute); - } - } - _leaveAttrValue() { - if (this.currentLocation) { - this.currentLocation.endLine = this.preprocessor.line; - this.currentLocation.endCol = this.preprocessor.col; - this.currentLocation.endOffset = this.preprocessor.offset; - } - } - //Token emission - prepareToken(ct) { - this._emitCurrentCharacterToken(ct.location); - this.currentToken = null; - if (ct.location) { - ct.location.endLine = this.preprocessor.line; - ct.location.endCol = this.preprocessor.col + 1; - ct.location.endOffset = this.preprocessor.offset + 1; - } - this.currentLocation = this.getCurrentLocation(-1); - } - emitCurrentTagToken() { - const ct = this.currentToken; - this.prepareToken(ct); - ct.tagID = getTagID(ct.tagName); - if (ct.type === TokenType.START_TAG) { - this.lastStartTagName = ct.tagName; - this.handler.onStartTag(ct); - } - else { - if (ct.attrs.length > 0) { - this._err(ERR.endTagWithAttributes); - } - if (ct.selfClosing) { - this._err(ERR.endTagWithTrailingSolidus); - } - this.handler.onEndTag(ct); - } - this.preprocessor.dropParsedChunk(); - } - emitCurrentComment(ct) { - this.prepareToken(ct); - this.handler.onComment(ct); - this.preprocessor.dropParsedChunk(); - } - emitCurrentDoctype(ct) { - this.prepareToken(ct); - this.handler.onDoctype(ct); - this.preprocessor.dropParsedChunk(); - } - _emitCurrentCharacterToken(nextLocation) { - if (this.currentCharacterToken) { - //NOTE: if we have a pending character token, make it's end location equal to the - //current token's start location. - if (nextLocation && this.currentCharacterToken.location) { - this.currentCharacterToken.location.endLine = nextLocation.startLine; - this.currentCharacterToken.location.endCol = nextLocation.startCol; - this.currentCharacterToken.location.endOffset = nextLocation.startOffset; - } - switch (this.currentCharacterToken.type) { - case TokenType.CHARACTER: { - this.handler.onCharacter(this.currentCharacterToken); - break; - } - case TokenType.NULL_CHARACTER: { - this.handler.onNullCharacter(this.currentCharacterToken); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - this.handler.onWhitespaceCharacter(this.currentCharacterToken); - break; - } - } - this.currentCharacterToken = null; - } - } - _emitEOFToken() { - const location = this.getCurrentLocation(0); - if (location) { - location.endLine = location.startLine; - location.endCol = location.startCol; - location.endOffset = location.startOffset; - } - this._emitCurrentCharacterToken(location); - this.handler.onEof({ type: TokenType.EOF, location }); - this.active = false; - } - //Characters emission - //OPTIMIZATION: specification uses only one type of character tokens (one token per character). - //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters. - //If we have a sequence of characters that belong to the same group, the parser can process it - //as a single solid character token. - //So, there are 3 types of character tokens in parse5: - //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000') - //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f') - //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^') - _appendCharToCurrentCharacterToken(type, ch) { - if (this.currentCharacterToken) { - if (this.currentCharacterToken.type !== type) { - this.currentLocation = this.getCurrentLocation(0); - this._emitCurrentCharacterToken(this.currentLocation); - this.preprocessor.dropParsedChunk(); - } - else { - this.currentCharacterToken.chars += ch; - return; - } - } - this._createCharacterToken(type, ch); - } - _emitCodePoint(cp) { - const type = isWhitespace(cp) - ? TokenType.WHITESPACE_CHARACTER - : cp === $.NULL - ? TokenType.NULL_CHARACTER - : TokenType.CHARACTER; - this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp)); - } - //NOTE: used when we emit characters explicitly. - //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks. - _emitChars(ch) { - this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch); - } - // Character reference helpers - _matchNamedCharacterReference(cp) { - let result = null; - let excess = 0; - let withoutSemicolon = false; - for (let i = 0, current = htmlDecodeTree[0]; i >= 0; cp = this._consume()) { - i = determineBranch(htmlDecodeTree, current, i + 1, cp); - if (i < 0) - break; - excess += 1; - current = htmlDecodeTree[i]; - const masked = current & BinTrieFlags.VALUE_LENGTH; - // If the branch is a value, store it and continue - if (masked) { - // The mask is the number of bytes of the value, including the current byte. - const valueLength = (masked >> 14) - 1; - // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. - // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state - if (cp !== $.SEMICOLON && - this._isCharacterReferenceInAttribute() && - isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) { - //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after - //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes. - result = [$.AMPERSAND]; - // Skip over the value. - i += valueLength; - } - else { - // If this is a surrogate pair, consume the next two bytes. - result = - valueLength === 0 - ? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH] - : valueLength === 1 - ? [htmlDecodeTree[++i]] - : [htmlDecodeTree[++i], htmlDecodeTree[++i]]; - excess = 0; - withoutSemicolon = cp !== $.SEMICOLON; - } - if (valueLength === 0) { - // If the value is zero-length, we're done. - this._consume(); - break; - } - } - } - this._unconsume(excess); - if (withoutSemicolon && !this.preprocessor.endOfChunkHit) { - this._err(ERR.missingSemicolonAfterCharacterReference); - } - // We want to emit the error above on the code point after the entity. - // We always consume one code point too many in the loop, and we wait to - // unconsume it until after the error is emitted. - this._unconsume(1); - return result; - } - _isCharacterReferenceInAttribute() { - return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED || - this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED || - this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED); - } - _flushCodePointConsumedAsCharacterReference(cp) { - if (this._isCharacterReferenceInAttribute()) { - this.currentAttr.value += String.fromCodePoint(cp); - } - else { - this._emitCodePoint(cp); - } - } - // Calling states this way turns out to be much faster than any other approach. - _callState(cp) { - switch (this.state) { - case State.DATA: { - this._stateData(cp); - break; - } - case State.RCDATA: { - this._stateRcdata(cp); - break; - } - case State.RAWTEXT: { - this._stateRawtext(cp); - break; - } - case State.SCRIPT_DATA: { - this._stateScriptData(cp); - break; - } - case State.PLAINTEXT: { - this._statePlaintext(cp); - break; - } - case State.TAG_OPEN: { - this._stateTagOpen(cp); - break; - } - case State.END_TAG_OPEN: { - this._stateEndTagOpen(cp); - break; - } - case State.TAG_NAME: { - this._stateTagName(cp); - break; - } - case State.RCDATA_LESS_THAN_SIGN: { - this._stateRcdataLessThanSign(cp); - break; - } - case State.RCDATA_END_TAG_OPEN: { - this._stateRcdataEndTagOpen(cp); - break; - } - case State.RCDATA_END_TAG_NAME: { - this._stateRcdataEndTagName(cp); - break; - } - case State.RAWTEXT_LESS_THAN_SIGN: { - this._stateRawtextLessThanSign(cp); - break; - } - case State.RAWTEXT_END_TAG_OPEN: { - this._stateRawtextEndTagOpen(cp); - break; - } - case State.RAWTEXT_END_TAG_NAME: { - this._stateRawtextEndTagName(cp); - break; - } - case State.SCRIPT_DATA_LESS_THAN_SIGN: { - this._stateScriptDataLessThanSign(cp); - break; - } - case State.SCRIPT_DATA_END_TAG_OPEN: { - this._stateScriptDataEndTagOpen(cp); - break; - } - case State.SCRIPT_DATA_END_TAG_NAME: { - this._stateScriptDataEndTagName(cp); - break; - } - case State.SCRIPT_DATA_ESCAPE_START: { - this._stateScriptDataEscapeStart(cp); - break; - } - case State.SCRIPT_DATA_ESCAPE_START_DASH: { - this._stateScriptDataEscapeStartDash(cp); - break; - } - case State.SCRIPT_DATA_ESCAPED: { - this._stateScriptDataEscaped(cp); - break; - } - case State.SCRIPT_DATA_ESCAPED_DASH: { - this._stateScriptDataEscapedDash(cp); - break; - } - case State.SCRIPT_DATA_ESCAPED_DASH_DASH: { - this._stateScriptDataEscapedDashDash(cp); - break; - } - case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { - this._stateScriptDataEscapedLessThanSign(cp); - break; - } - case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: { - this._stateScriptDataEscapedEndTagOpen(cp); - break; - } - case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: { - this._stateScriptDataEscapedEndTagName(cp); - break; - } - case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: { - this._stateScriptDataDoubleEscapeStart(cp); - break; - } - case State.SCRIPT_DATA_DOUBLE_ESCAPED: { - this._stateScriptDataDoubleEscaped(cp); - break; - } - case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { - this._stateScriptDataDoubleEscapedDash(cp); - break; - } - case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { - this._stateScriptDataDoubleEscapedDashDash(cp); - break; - } - case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { - this._stateScriptDataDoubleEscapedLessThanSign(cp); - break; - } - case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: { - this._stateScriptDataDoubleEscapeEnd(cp); - break; - } - case State.BEFORE_ATTRIBUTE_NAME: { - this._stateBeforeAttributeName(cp); - break; - } - case State.ATTRIBUTE_NAME: { - this._stateAttributeName(cp); - break; - } - case State.AFTER_ATTRIBUTE_NAME: { - this._stateAfterAttributeName(cp); - break; - } - case State.BEFORE_ATTRIBUTE_VALUE: { - this._stateBeforeAttributeValue(cp); - break; - } - case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: { - this._stateAttributeValueDoubleQuoted(cp); - break; - } - case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: { - this._stateAttributeValueSingleQuoted(cp); - break; - } - case State.ATTRIBUTE_VALUE_UNQUOTED: { - this._stateAttributeValueUnquoted(cp); - break; - } - case State.AFTER_ATTRIBUTE_VALUE_QUOTED: { - this._stateAfterAttributeValueQuoted(cp); - break; - } - case State.SELF_CLOSING_START_TAG: { - this._stateSelfClosingStartTag(cp); - break; - } - case State.BOGUS_COMMENT: { - this._stateBogusComment(cp); - break; - } - case State.MARKUP_DECLARATION_OPEN: { - this._stateMarkupDeclarationOpen(cp); - break; - } - case State.COMMENT_START: { - this._stateCommentStart(cp); - break; - } - case State.COMMENT_START_DASH: { - this._stateCommentStartDash(cp); - break; - } - case State.COMMENT: { - this._stateComment(cp); - break; - } - case State.COMMENT_LESS_THAN_SIGN: { - this._stateCommentLessThanSign(cp); - break; - } - case State.COMMENT_LESS_THAN_SIGN_BANG: { - this._stateCommentLessThanSignBang(cp); - break; - } - case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: { - this._stateCommentLessThanSignBangDash(cp); - break; - } - case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: { - this._stateCommentLessThanSignBangDashDash(cp); - break; - } - case State.COMMENT_END_DASH: { - this._stateCommentEndDash(cp); - break; - } - case State.COMMENT_END: { - this._stateCommentEnd(cp); - break; - } - case State.COMMENT_END_BANG: { - this._stateCommentEndBang(cp); - break; - } - case State.DOCTYPE: { - this._stateDoctype(cp); - break; - } - case State.BEFORE_DOCTYPE_NAME: { - this._stateBeforeDoctypeName(cp); - break; - } - case State.DOCTYPE_NAME: { - this._stateDoctypeName(cp); - break; - } - case State.AFTER_DOCTYPE_NAME: { - this._stateAfterDoctypeName(cp); - break; - } - case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: { - this._stateAfterDoctypePublicKeyword(cp); - break; - } - case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { - this._stateBeforeDoctypePublicIdentifier(cp); - break; - } - case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { - this._stateDoctypePublicIdentifierDoubleQuoted(cp); - break; - } - case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { - this._stateDoctypePublicIdentifierSingleQuoted(cp); - break; - } - case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { - this._stateAfterDoctypePublicIdentifier(cp); - break; - } - case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { - this._stateBetweenDoctypePublicAndSystemIdentifiers(cp); - break; - } - case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: { - this._stateAfterDoctypeSystemKeyword(cp); - break; - } - case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { - this._stateBeforeDoctypeSystemIdentifier(cp); - break; - } - case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { - this._stateDoctypeSystemIdentifierDoubleQuoted(cp); - break; - } - case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { - this._stateDoctypeSystemIdentifierSingleQuoted(cp); - break; - } - case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { - this._stateAfterDoctypeSystemIdentifier(cp); - break; - } - case State.BOGUS_DOCTYPE: { - this._stateBogusDoctype(cp); - break; - } - case State.CDATA_SECTION: { - this._stateCdataSection(cp); - break; - } - case State.CDATA_SECTION_BRACKET: { - this._stateCdataSectionBracket(cp); - break; - } - case State.CDATA_SECTION_END: { - this._stateCdataSectionEnd(cp); - break; - } - case State.CHARACTER_REFERENCE: { - this._stateCharacterReference(cp); - break; - } - case State.NAMED_CHARACTER_REFERENCE: { - this._stateNamedCharacterReference(cp); - break; - } - case State.AMBIGUOUS_AMPERSAND: { - this._stateAmbiguousAmpersand(cp); - break; - } - case State.NUMERIC_CHARACTER_REFERENCE: { - this._stateNumericCharacterReference(cp); - break; - } - case State.HEXADEMICAL_CHARACTER_REFERENCE_START: { - this._stateHexademicalCharacterReferenceStart(cp); - break; - } - case State.HEXADEMICAL_CHARACTER_REFERENCE: { - this._stateHexademicalCharacterReference(cp); - break; - } - case State.DECIMAL_CHARACTER_REFERENCE: { - this._stateDecimalCharacterReference(cp); - break; - } - case State.NUMERIC_CHARACTER_REFERENCE_END: { - this._stateNumericCharacterReferenceEnd(cp); - break; - } - default: { - throw new Error('Unknown state'); - } - } - } - // State machine - // Data state - //------------------------------------------------------------------ - _stateData(cp) { - switch (cp) { - case $.LESS_THAN_SIGN: { - this.state = State.TAG_OPEN; - break; - } - case $.AMPERSAND: { - this.returnState = State.DATA; - this.state = State.CHARACTER_REFERENCE; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this._emitCodePoint(cp); - break; - } - case $.EOF: { - this._emitEOFToken(); - break; - } - default: { - this._emitCodePoint(cp); - } - } - } - // RCDATA state - //------------------------------------------------------------------ - _stateRcdata(cp) { - switch (cp) { - case $.AMPERSAND: { - this.returnState = State.RCDATA; - this.state = State.CHARACTER_REFERENCE; - break; - } - case $.LESS_THAN_SIGN: { - this.state = State.RCDATA_LESS_THAN_SIGN; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this._emitChars(REPLACEMENT_CHARACTER); - break; - } - case $.EOF: { - this._emitEOFToken(); - break; - } - default: { - this._emitCodePoint(cp); - } - } - } - // RAWTEXT state - //------------------------------------------------------------------ - _stateRawtext(cp) { - switch (cp) { - case $.LESS_THAN_SIGN: { - this.state = State.RAWTEXT_LESS_THAN_SIGN; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this._emitChars(REPLACEMENT_CHARACTER); - break; - } - case $.EOF: { - this._emitEOFToken(); - break; - } - default: { - this._emitCodePoint(cp); - } - } - } - // Script data state - //------------------------------------------------------------------ - _stateScriptData(cp) { - switch (cp) { - case $.LESS_THAN_SIGN: { - this.state = State.SCRIPT_DATA_LESS_THAN_SIGN; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this._emitChars(REPLACEMENT_CHARACTER); - break; - } - case $.EOF: { - this._emitEOFToken(); - break; - } - default: { - this._emitCodePoint(cp); - } - } - } - // PLAINTEXT state - //------------------------------------------------------------------ - _statePlaintext(cp) { - switch (cp) { - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this._emitChars(REPLACEMENT_CHARACTER); - break; - } - case $.EOF: { - this._emitEOFToken(); - break; - } - default: { - this._emitCodePoint(cp); - } - } - } - // Tag open state - //------------------------------------------------------------------ - _stateTagOpen(cp) { - if (isAsciiLetter(cp)) { - this._createStartTagToken(); - this.state = State.TAG_NAME; - this._stateTagName(cp); - } - else - switch (cp) { - case $.EXCLAMATION_MARK: { - this.state = State.MARKUP_DECLARATION_OPEN; - break; - } - case $.SOLIDUS: { - this.state = State.END_TAG_OPEN; - break; - } - case $.QUESTION_MARK: { - this._err(ERR.unexpectedQuestionMarkInsteadOfTagName); - this._createCommentToken(1); - this.state = State.BOGUS_COMMENT; - this._stateBogusComment(cp); - break; - } - case $.EOF: { - this._err(ERR.eofBeforeTagName); - this._emitChars('<'); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.invalidFirstCharacterOfTagName); - this._emitChars('<'); - this.state = State.DATA; - this._stateData(cp); - } - } - } - // End tag open state - //------------------------------------------------------------------ - _stateEndTagOpen(cp) { - if (isAsciiLetter(cp)) { - this._createEndTagToken(); - this.state = State.TAG_NAME; - this._stateTagName(cp); - } - else - switch (cp) { - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingEndTagName); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofBeforeTagName); - this._emitChars(''); - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this.state = State.SCRIPT_DATA_ESCAPED; - this._emitChars(REPLACEMENT_CHARACTER); - break; - } - case $.EOF: { - this._err(ERR.eofInScriptHtmlCommentLikeText); - this._emitEOFToken(); - break; - } - default: { - this.state = State.SCRIPT_DATA_ESCAPED; - this._emitCodePoint(cp); - } - } - } - // Script data escaped less-than sign state - //------------------------------------------------------------------ - _stateScriptDataEscapedLessThanSign(cp) { - if (cp === $.SOLIDUS) { - this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN; - } - else if (isAsciiLetter(cp)) { - this._emitChars('<'); - this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START; - this._stateScriptDataDoubleEscapeStart(cp); - } - else { - this._emitChars('<'); - this.state = State.SCRIPT_DATA_ESCAPED; - this._stateScriptDataEscaped(cp); - } - } - // Script data escaped end tag open state - //------------------------------------------------------------------ - _stateScriptDataEscapedEndTagOpen(cp) { - if (isAsciiLetter(cp)) { - this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME; - this._stateScriptDataEscapedEndTagName(cp); - } - else { - this._emitChars(''); - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; - this._emitChars(REPLACEMENT_CHARACTER); - break; - } - case $.EOF: { - this._err(ERR.eofInScriptHtmlCommentLikeText); - this._emitEOFToken(); - break; - } - default: { - this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; - this._emitCodePoint(cp); - } - } - } - // Script data double escaped less-than sign state - //------------------------------------------------------------------ - _stateScriptDataDoubleEscapedLessThanSign(cp) { - if (cp === $.SOLIDUS) { - this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END; - this._emitChars('/'); - } - else { - this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; - this._stateScriptDataDoubleEscaped(cp); - } - } - // Script data double escape end state - //------------------------------------------------------------------ - _stateScriptDataDoubleEscapeEnd(cp) { - if (this.preprocessor.startsWith($$.SCRIPT, false) && - isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) { - this._emitCodePoint(cp); - for (let i = 0; i < $$.SCRIPT.length; i++) { - this._emitCodePoint(this._consume()); - } - this.state = State.SCRIPT_DATA_ESCAPED; - } - else if (!this._ensureHibernation()) { - this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; - this._stateScriptDataDoubleEscaped(cp); - } - } - // Before attribute name state - //------------------------------------------------------------------ - _stateBeforeAttributeName(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.SOLIDUS: - case $.GREATER_THAN_SIGN: - case $.EOF: { - this.state = State.AFTER_ATTRIBUTE_NAME; - this._stateAfterAttributeName(cp); - break; - } - case $.EQUALS_SIGN: { - this._err(ERR.unexpectedEqualsSignBeforeAttributeName); - this._createAttr('='); - this.state = State.ATTRIBUTE_NAME; - break; - } - default: { - this._createAttr(''); - this.state = State.ATTRIBUTE_NAME; - this._stateAttributeName(cp); - } - } - } - // Attribute name state - //------------------------------------------------------------------ - _stateAttributeName(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: - case $.SOLIDUS: - case $.GREATER_THAN_SIGN: - case $.EOF: { - this._leaveAttrName(); - this.state = State.AFTER_ATTRIBUTE_NAME; - this._stateAfterAttributeName(cp); - break; - } - case $.EQUALS_SIGN: { - this._leaveAttrName(); - this.state = State.BEFORE_ATTRIBUTE_VALUE; - break; - } - case $.QUOTATION_MARK: - case $.APOSTROPHE: - case $.LESS_THAN_SIGN: { - this._err(ERR.unexpectedCharacterInAttributeName); - this.currentAttr.name += String.fromCodePoint(cp); - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this.currentAttr.name += REPLACEMENT_CHARACTER; - break; - } - default: { - this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); - } - } - } - // After attribute name state - //------------------------------------------------------------------ - _stateAfterAttributeName(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.SOLIDUS: { - this.state = State.SELF_CLOSING_START_TAG; - break; - } - case $.EQUALS_SIGN: { - this.state = State.BEFORE_ATTRIBUTE_VALUE; - break; - } - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - this.emitCurrentTagToken(); - break; - } - case $.EOF: { - this._err(ERR.eofInTag); - this._emitEOFToken(); - break; - } - default: { - this._createAttr(''); - this.state = State.ATTRIBUTE_NAME; - this._stateAttributeName(cp); - } - } - } - // Before attribute value state - //------------------------------------------------------------------ - _stateBeforeAttributeValue(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.QUOTATION_MARK: { - this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingAttributeValue); - this.state = State.DATA; - this.emitCurrentTagToken(); - break; - } - default: { - this.state = State.ATTRIBUTE_VALUE_UNQUOTED; - this._stateAttributeValueUnquoted(cp); - } - } - } - // Attribute value (double-quoted) state - //------------------------------------------------------------------ - _stateAttributeValueDoubleQuoted(cp) { - switch (cp) { - case $.QUOTATION_MARK: { - this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; - break; - } - case $.AMPERSAND: { - this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; - this.state = State.CHARACTER_REFERENCE; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this.currentAttr.value += REPLACEMENT_CHARACTER; - break; - } - case $.EOF: { - this._err(ERR.eofInTag); - this._emitEOFToken(); - break; - } - default: { - this.currentAttr.value += String.fromCodePoint(cp); - } - } - } - // Attribute value (single-quoted) state - //------------------------------------------------------------------ - _stateAttributeValueSingleQuoted(cp) { - switch (cp) { - case $.APOSTROPHE: { - this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; - break; - } - case $.AMPERSAND: { - this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; - this.state = State.CHARACTER_REFERENCE; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this.currentAttr.value += REPLACEMENT_CHARACTER; - break; - } - case $.EOF: { - this._err(ERR.eofInTag); - this._emitEOFToken(); - break; - } - default: { - this.currentAttr.value += String.fromCodePoint(cp); - } - } - } - // Attribute value (unquoted) state - //------------------------------------------------------------------ - _stateAttributeValueUnquoted(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this._leaveAttrValue(); - this.state = State.BEFORE_ATTRIBUTE_NAME; - break; - } - case $.AMPERSAND: { - this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED; - this.state = State.CHARACTER_REFERENCE; - break; - } - case $.GREATER_THAN_SIGN: { - this._leaveAttrValue(); - this.state = State.DATA; - this.emitCurrentTagToken(); - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this.currentAttr.value += REPLACEMENT_CHARACTER; - break; - } - case $.QUOTATION_MARK: - case $.APOSTROPHE: - case $.LESS_THAN_SIGN: - case $.EQUALS_SIGN: - case $.GRAVE_ACCENT: { - this._err(ERR.unexpectedCharacterInUnquotedAttributeValue); - this.currentAttr.value += String.fromCodePoint(cp); - break; - } - case $.EOF: { - this._err(ERR.eofInTag); - this._emitEOFToken(); - break; - } - default: { - this.currentAttr.value += String.fromCodePoint(cp); - } - } - } - // After attribute value (quoted) state - //------------------------------------------------------------------ - _stateAfterAttributeValueQuoted(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this._leaveAttrValue(); - this.state = State.BEFORE_ATTRIBUTE_NAME; - break; - } - case $.SOLIDUS: { - this._leaveAttrValue(); - this.state = State.SELF_CLOSING_START_TAG; - break; - } - case $.GREATER_THAN_SIGN: { - this._leaveAttrValue(); - this.state = State.DATA; - this.emitCurrentTagToken(); - break; - } - case $.EOF: { - this._err(ERR.eofInTag); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingWhitespaceBetweenAttributes); - this.state = State.BEFORE_ATTRIBUTE_NAME; - this._stateBeforeAttributeName(cp); - } - } - } - // Self-closing start tag state - //------------------------------------------------------------------ - _stateSelfClosingStartTag(cp) { - switch (cp) { - case $.GREATER_THAN_SIGN: { - const token = this.currentToken; - token.selfClosing = true; - this.state = State.DATA; - this.emitCurrentTagToken(); - break; - } - case $.EOF: { - this._err(ERR.eofInTag); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.unexpectedSolidusInTag); - this.state = State.BEFORE_ATTRIBUTE_NAME; - this._stateBeforeAttributeName(cp); - } - } - } - // Bogus comment state - //------------------------------------------------------------------ - _stateBogusComment(cp) { - const token = this.currentToken; - switch (cp) { - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - this.emitCurrentComment(token); - break; - } - case $.EOF: { - this.emitCurrentComment(token); - this._emitEOFToken(); - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.data += REPLACEMENT_CHARACTER; - break; - } - default: { - token.data += String.fromCodePoint(cp); - } - } - } - // Markup declaration open state - //------------------------------------------------------------------ - _stateMarkupDeclarationOpen(cp) { - if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) { - this._createCommentToken($$.DASH_DASH.length + 1); - this.state = State.COMMENT_START; - } - else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) { - // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here. - this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1); - this.state = State.DOCTYPE; - } - else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) { - if (this.inForeignNode) { - this.state = State.CDATA_SECTION; - } - else { - this._err(ERR.cdataInHtmlContent); - this._createCommentToken($$.CDATA_START.length + 1); - this.currentToken.data = '[CDATA['; - this.state = State.BOGUS_COMMENT; - } - } - //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup - //results are no longer valid and we will need to start over. - else if (!this._ensureHibernation()) { - this._err(ERR.incorrectlyOpenedComment); - this._createCommentToken(2); - this.state = State.BOGUS_COMMENT; - this._stateBogusComment(cp); - } - } - // Comment start state - //------------------------------------------------------------------ - _stateCommentStart(cp) { - switch (cp) { - case $.HYPHEN_MINUS: { - this.state = State.COMMENT_START_DASH; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.abruptClosingOfEmptyComment); - this.state = State.DATA; - const token = this.currentToken; - this.emitCurrentComment(token); - break; - } - default: { - this.state = State.COMMENT; - this._stateComment(cp); - } - } - } - // Comment start dash state - //------------------------------------------------------------------ - _stateCommentStartDash(cp) { - const token = this.currentToken; - switch (cp) { - case $.HYPHEN_MINUS: { - this.state = State.COMMENT_END; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.abruptClosingOfEmptyComment); - this.state = State.DATA; - this.emitCurrentComment(token); - break; - } - case $.EOF: { - this._err(ERR.eofInComment); - this.emitCurrentComment(token); - this._emitEOFToken(); - break; - } - default: { - token.data += '-'; - this.state = State.COMMENT; - this._stateComment(cp); - } - } - } - // Comment state - //------------------------------------------------------------------ - _stateComment(cp) { - const token = this.currentToken; - switch (cp) { - case $.HYPHEN_MINUS: { - this.state = State.COMMENT_END_DASH; - break; - } - case $.LESS_THAN_SIGN: { - token.data += '<'; - this.state = State.COMMENT_LESS_THAN_SIGN; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.data += REPLACEMENT_CHARACTER; - break; - } - case $.EOF: { - this._err(ERR.eofInComment); - this.emitCurrentComment(token); - this._emitEOFToken(); - break; - } - default: { - token.data += String.fromCodePoint(cp); - } - } - } - // Comment less-than sign state - //------------------------------------------------------------------ - _stateCommentLessThanSign(cp) { - const token = this.currentToken; - switch (cp) { - case $.EXCLAMATION_MARK: { - token.data += '!'; - this.state = State.COMMENT_LESS_THAN_SIGN_BANG; - break; - } - case $.LESS_THAN_SIGN: { - token.data += '<'; - break; - } - default: { - this.state = State.COMMENT; - this._stateComment(cp); - } - } - } - // Comment less-than sign bang state - //------------------------------------------------------------------ - _stateCommentLessThanSignBang(cp) { - if (cp === $.HYPHEN_MINUS) { - this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH; - } - else { - this.state = State.COMMENT; - this._stateComment(cp); - } - } - // Comment less-than sign bang dash state - //------------------------------------------------------------------ - _stateCommentLessThanSignBangDash(cp) { - if (cp === $.HYPHEN_MINUS) { - this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; - } - else { - this.state = State.COMMENT_END_DASH; - this._stateCommentEndDash(cp); - } - } - // Comment less-than sign bang dash dash state - //------------------------------------------------------------------ - _stateCommentLessThanSignBangDashDash(cp) { - if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) { - this._err(ERR.nestedComment); - } - this.state = State.COMMENT_END; - this._stateCommentEnd(cp); - } - // Comment end dash state - //------------------------------------------------------------------ - _stateCommentEndDash(cp) { - const token = this.currentToken; - switch (cp) { - case $.HYPHEN_MINUS: { - this.state = State.COMMENT_END; - break; - } - case $.EOF: { - this._err(ERR.eofInComment); - this.emitCurrentComment(token); - this._emitEOFToken(); - break; - } - default: { - token.data += '-'; - this.state = State.COMMENT; - this._stateComment(cp); - } - } - } - // Comment end state - //------------------------------------------------------------------ - _stateCommentEnd(cp) { - const token = this.currentToken; - switch (cp) { - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - this.emitCurrentComment(token); - break; - } - case $.EXCLAMATION_MARK: { - this.state = State.COMMENT_END_BANG; - break; - } - case $.HYPHEN_MINUS: { - token.data += '-'; - break; - } - case $.EOF: { - this._err(ERR.eofInComment); - this.emitCurrentComment(token); - this._emitEOFToken(); - break; - } - default: { - token.data += '--'; - this.state = State.COMMENT; - this._stateComment(cp); - } - } - } - // Comment end bang state - //------------------------------------------------------------------ - _stateCommentEndBang(cp) { - const token = this.currentToken; - switch (cp) { - case $.HYPHEN_MINUS: { - token.data += '--!'; - this.state = State.COMMENT_END_DASH; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.incorrectlyClosedComment); - this.state = State.DATA; - this.emitCurrentComment(token); - break; - } - case $.EOF: { - this._err(ERR.eofInComment); - this.emitCurrentComment(token); - this._emitEOFToken(); - break; - } - default: { - token.data += '--!'; - this.state = State.COMMENT; - this._stateComment(cp); - } - } - } - // DOCTYPE state - //------------------------------------------------------------------ - _stateDoctype(cp) { - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this.state = State.BEFORE_DOCTYPE_NAME; - break; - } - case $.GREATER_THAN_SIGN: { - this.state = State.BEFORE_DOCTYPE_NAME; - this._stateBeforeDoctypeName(cp); - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - this._createDoctypeToken(null); - const token = this.currentToken; - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingWhitespaceBeforeDoctypeName); - this.state = State.BEFORE_DOCTYPE_NAME; - this._stateBeforeDoctypeName(cp); - } - } - } - // Before DOCTYPE name state - //------------------------------------------------------------------ - _stateBeforeDoctypeName(cp) { - if (isAsciiUpper(cp)) { - this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp))); - this.state = State.DOCTYPE_NAME; - } - else - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - this._createDoctypeToken(REPLACEMENT_CHARACTER); - this.state = State.DOCTYPE_NAME; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingDoctypeName); - this._createDoctypeToken(null); - const token = this.currentToken; - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - this._createDoctypeToken(null); - const token = this.currentToken; - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._createDoctypeToken(String.fromCodePoint(cp)); - this.state = State.DOCTYPE_NAME; - } - } - } - // DOCTYPE name state - //------------------------------------------------------------------ - _stateDoctypeName(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this.state = State.AFTER_DOCTYPE_NAME; - break; - } - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.name += REPLACEMENT_CHARACTER; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); - } - } - } - // After DOCTYPE name state - //------------------------------------------------------------------ - _stateAfterDoctypeName(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - if (this._consumeSequenceIfMatch($$.PUBLIC, false)) { - this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD; - } - else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) { - this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD; - } - //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup - //results are no longer valid and we will need to start over. - else if (!this._ensureHibernation()) { - this._err(ERR.invalidCharacterSequenceAfterDoctypeName); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - } - // After DOCTYPE public keyword state - //------------------------------------------------------------------ - _stateAfterDoctypePublicKeyword(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; - break; - } - case $.QUOTATION_MARK: { - this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); - token.publicId = ''; - this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); - token.publicId = ''; - this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingDoctypePublicIdentifier); - token.forceQuirks = true; - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // Before DOCTYPE public identifier state - //------------------------------------------------------------------ - _stateBeforeDoctypePublicIdentifier(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.QUOTATION_MARK: { - token.publicId = ''; - this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - token.publicId = ''; - this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingDoctypePublicIdentifier); - token.forceQuirks = true; - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // DOCTYPE public identifier (double-quoted) state - //------------------------------------------------------------------ - _stateDoctypePublicIdentifierDoubleQuoted(cp) { - const token = this.currentToken; - switch (cp) { - case $.QUOTATION_MARK: { - this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.publicId += REPLACEMENT_CHARACTER; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.abruptDoctypePublicIdentifier); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - token.publicId += String.fromCodePoint(cp); - } - } - } - // DOCTYPE public identifier (single-quoted) state - //------------------------------------------------------------------ - _stateDoctypePublicIdentifierSingleQuoted(cp) { - const token = this.currentToken; - switch (cp) { - case $.APOSTROPHE: { - this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.publicId += REPLACEMENT_CHARACTER; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.abruptDoctypePublicIdentifier); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - token.publicId += String.fromCodePoint(cp); - } - } - } - // After DOCTYPE public identifier state - //------------------------------------------------------------------ - _stateAfterDoctypePublicIdentifier(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; - break; - } - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.QUOTATION_MARK: { - this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // Between DOCTYPE public and system identifiers state - //------------------------------------------------------------------ - _stateBetweenDoctypePublicAndSystemIdentifiers(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.GREATER_THAN_SIGN: { - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.QUOTATION_MARK: { - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // After DOCTYPE system keyword state - //------------------------------------------------------------------ - _stateAfterDoctypeSystemKeyword(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; - break; - } - case $.QUOTATION_MARK: { - this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingDoctypeSystemIdentifier); - token.forceQuirks = true; - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // Before DOCTYPE system identifier state - //------------------------------------------------------------------ - _stateBeforeDoctypeSystemIdentifier(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.QUOTATION_MARK: { - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; - break; - } - case $.APOSTROPHE: { - token.systemId = ''; - this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.missingDoctypeSystemIdentifier); - token.forceQuirks = true; - this.state = State.DATA; - this.emitCurrentDoctype(token); - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); - token.forceQuirks = true; - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // DOCTYPE system identifier (double-quoted) state - //------------------------------------------------------------------ - _stateDoctypeSystemIdentifierDoubleQuoted(cp) { - const token = this.currentToken; - switch (cp) { - case $.QUOTATION_MARK: { - this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.systemId += REPLACEMENT_CHARACTER; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.abruptDoctypeSystemIdentifier); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - token.systemId += String.fromCodePoint(cp); - } - } - } - // DOCTYPE system identifier (single-quoted) state - //------------------------------------------------------------------ - _stateDoctypeSystemIdentifierSingleQuoted(cp) { - const token = this.currentToken; - switch (cp) { - case $.APOSTROPHE: { - this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - token.systemId += REPLACEMENT_CHARACTER; - break; - } - case $.GREATER_THAN_SIGN: { - this._err(ERR.abruptDoctypeSystemIdentifier); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - token.systemId += String.fromCodePoint(cp); - } - } - } - // After DOCTYPE system identifier state - //------------------------------------------------------------------ - _stateAfterDoctypeSystemIdentifier(cp) { - const token = this.currentToken; - switch (cp) { - case $.SPACE: - case $.LINE_FEED: - case $.TABULATION: - case $.FORM_FEED: { - // Ignore whitespace - break; - } - case $.GREATER_THAN_SIGN: { - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.EOF: { - this._err(ERR.eofInDoctype); - token.forceQuirks = true; - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: { - this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier); - this.state = State.BOGUS_DOCTYPE; - this._stateBogusDoctype(cp); - } - } - } - // Bogus DOCTYPE state - //------------------------------------------------------------------ - _stateBogusDoctype(cp) { - const token = this.currentToken; - switch (cp) { - case $.GREATER_THAN_SIGN: { - this.emitCurrentDoctype(token); - this.state = State.DATA; - break; - } - case $.NULL: { - this._err(ERR.unexpectedNullCharacter); - break; - } - case $.EOF: { - this.emitCurrentDoctype(token); - this._emitEOFToken(); - break; - } - default: - // Do nothing - } - } - // CDATA section state - //------------------------------------------------------------------ - _stateCdataSection(cp) { - switch (cp) { - case $.RIGHT_SQUARE_BRACKET: { - this.state = State.CDATA_SECTION_BRACKET; - break; - } - case $.EOF: { - this._err(ERR.eofInCdata); - this._emitEOFToken(); - break; - } - default: { - this._emitCodePoint(cp); - } - } - } - // CDATA section bracket state - //------------------------------------------------------------------ - _stateCdataSectionBracket(cp) { - if (cp === $.RIGHT_SQUARE_BRACKET) { - this.state = State.CDATA_SECTION_END; - } - else { - this._emitChars(']'); - this.state = State.CDATA_SECTION; - this._stateCdataSection(cp); - } - } - // CDATA section end state - //------------------------------------------------------------------ - _stateCdataSectionEnd(cp) { - switch (cp) { - case $.GREATER_THAN_SIGN: { - this.state = State.DATA; - break; - } - case $.RIGHT_SQUARE_BRACKET: { - this._emitChars(']'); - break; - } - default: { - this._emitChars(']]'); - this.state = State.CDATA_SECTION; - this._stateCdataSection(cp); - } - } - } - // Character reference state - //------------------------------------------------------------------ - _stateCharacterReference(cp) { - if (cp === $.NUMBER_SIGN) { - this.state = State.NUMERIC_CHARACTER_REFERENCE; - } - else if (isAsciiAlphaNumeric(cp)) { - this.state = State.NAMED_CHARACTER_REFERENCE; - this._stateNamedCharacterReference(cp); - } - else { - this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); - this._reconsumeInState(this.returnState, cp); - } - } - // Named character reference state - //------------------------------------------------------------------ - _stateNamedCharacterReference(cp) { - const matchResult = this._matchNamedCharacterReference(cp); - //NOTE: Matching can be abrupted by hibernation. In that case, match - //results are no longer valid and we will need to start over. - if (this._ensureHibernation()) { - // Stay in the state, try again. - } - else if (matchResult) { - for (let i = 0; i < matchResult.length; i++) { - this._flushCodePointConsumedAsCharacterReference(matchResult[i]); - } - this.state = this.returnState; - } - else { - this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); - this.state = State.AMBIGUOUS_AMPERSAND; - } - } - // Ambiguos ampersand state - //------------------------------------------------------------------ - _stateAmbiguousAmpersand(cp) { - if (isAsciiAlphaNumeric(cp)) { - this._flushCodePointConsumedAsCharacterReference(cp); - } - else { - if (cp === $.SEMICOLON) { - this._err(ERR.unknownNamedCharacterReference); - } - this._reconsumeInState(this.returnState, cp); - } - } - // Numeric character reference state - //------------------------------------------------------------------ - _stateNumericCharacterReference(cp) { - this.charRefCode = 0; - if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) { - this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START; - } - // Inlined decimal character reference start state - else if (isAsciiDigit(cp)) { - this.state = State.DECIMAL_CHARACTER_REFERENCE; - this._stateDecimalCharacterReference(cp); - } - else { - this._err(ERR.absenceOfDigitsInNumericCharacterReference); - this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); - this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN); - this._reconsumeInState(this.returnState, cp); - } - } - // Hexademical character reference start state - //------------------------------------------------------------------ - _stateHexademicalCharacterReferenceStart(cp) { - if (isAsciiHexDigit(cp)) { - this.state = State.HEXADEMICAL_CHARACTER_REFERENCE; - this._stateHexademicalCharacterReference(cp); - } - else { - this._err(ERR.absenceOfDigitsInNumericCharacterReference); - this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); - this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN); - this._unconsume(2); - this.state = this.returnState; - } - } - // Hexademical character reference state - //------------------------------------------------------------------ - _stateHexademicalCharacterReference(cp) { - if (isAsciiUpperHexDigit(cp)) { - this.charRefCode = this.charRefCode * 16 + cp - 0x37; - } - else if (isAsciiLowerHexDigit(cp)) { - this.charRefCode = this.charRefCode * 16 + cp - 0x57; - } - else if (isAsciiDigit(cp)) { - this.charRefCode = this.charRefCode * 16 + cp - 0x30; - } - else if (cp === $.SEMICOLON) { - this.state = State.NUMERIC_CHARACTER_REFERENCE_END; - } - else { - this._err(ERR.missingSemicolonAfterCharacterReference); - this.state = State.NUMERIC_CHARACTER_REFERENCE_END; - this._stateNumericCharacterReferenceEnd(cp); - } - } - // Decimal character reference state - //------------------------------------------------------------------ - _stateDecimalCharacterReference(cp) { - if (isAsciiDigit(cp)) { - this.charRefCode = this.charRefCode * 10 + cp - 0x30; - } - else if (cp === $.SEMICOLON) { - this.state = State.NUMERIC_CHARACTER_REFERENCE_END; - } - else { - this._err(ERR.missingSemicolonAfterCharacterReference); - this.state = State.NUMERIC_CHARACTER_REFERENCE_END; - this._stateNumericCharacterReferenceEnd(cp); - } - } - // Numeric character reference end state - //------------------------------------------------------------------ - _stateNumericCharacterReferenceEnd(cp) { - if (this.charRefCode === $.NULL) { - this._err(ERR.nullCharacterReference); - this.charRefCode = $.REPLACEMENT_CHARACTER; - } - else if (this.charRefCode > 1114111) { - this._err(ERR.characterReferenceOutsideUnicodeRange); - this.charRefCode = $.REPLACEMENT_CHARACTER; - } - else if (isSurrogate(this.charRefCode)) { - this._err(ERR.surrogateCharacterReference); - this.charRefCode = $.REPLACEMENT_CHARACTER; - } - else if (isUndefinedCodePoint(this.charRefCode)) { - this._err(ERR.noncharacterCharacterReference); - } - else if (isControlCodePoint(this.charRefCode) || this.charRefCode === $.CARRIAGE_RETURN) { - this._err(ERR.controlCharacterReference); - const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode); - if (replacement !== undefined) { - this.charRefCode = replacement; - } - } - this._flushCodePointConsumedAsCharacterReference(this.charRefCode); - this._reconsumeInState(this.returnState, cp); - } -} -//# sourceMappingURL=index.js.map \ No newline at end of file -- cgit