diff options
Diffstat (limited to 'includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js')
-rw-r--r-- | includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js | 2904 |
1 files changed, 2904 insertions, 0 deletions
diff --git a/includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js b/includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js new file mode 100644 index 0000000..217b7bc --- /dev/null +++ b/includes/external/addressbook/node_modules/parse5/dist/tokenizer/index.js @@ -0,0 +1,2904 @@ +import { Preprocessor } from './preprocessor.js'; +import { CODE_POINTS as $, SEQUENCES as $$, REPLACEMENT_CHARACTER, isSurrogate, isUndefinedCodePoint, isControlCodePoint, } from '../common/unicode.js'; +import { TokenType, getTokenAttr, } from '../common/token.js'; +import { htmlDecodeTree, BinTrieFlags, determineBranch } from 'entities/lib/decode.js'; +import { ERR } from '../common/error-codes.js'; +import { TAG_ID, getTagID } from '../common/html.js'; +//C1 Unicode control character reference replacements +const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([ + [0x80, 8364], + [0x82, 8218], + [0x83, 402], + [0x84, 8222], + [0x85, 8230], + [0x86, 8224], + [0x87, 8225], + [0x88, 710], + [0x89, 8240], + [0x8a, 352], + [0x8b, 8249], + [0x8c, 338], + [0x8e, 381], + [0x91, 8216], + [0x92, 8217], + [0x93, 8220], + [0x94, 8221], + [0x95, 8226], + [0x96, 8211], + [0x97, 8212], + [0x98, 732], + [0x99, 8482], + [0x9a, 353], + [0x9b, 8250], + [0x9c, 339], + [0x9e, 382], + [0x9f, 376], +]); +//States +var State; +(function (State) { + State[State["DATA"] = 0] = "DATA"; + State[State["RCDATA"] = 1] = "RCDATA"; + State[State["RAWTEXT"] = 2] = "RAWTEXT"; + State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA"; + State[State["PLAINTEXT"] = 4] = "PLAINTEXT"; + State[State["TAG_OPEN"] = 5] = "TAG_OPEN"; + State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN"; + State[State["TAG_NAME"] = 7] = "TAG_NAME"; + State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN"; + State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN"; + State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME"; + State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN"; + State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN"; + State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME"; + State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN"; + State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN"; + State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME"; + State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START"; + State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH"; + State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED"; + State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH"; + State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH"; + State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"; + State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN"; + State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME"; + State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START"; + State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED"; + State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH"; + State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"; + State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"; + State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END"; + State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME"; + State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME"; + State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME"; + State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE"; + State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED"; + State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED"; + State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED"; + State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED"; + State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG"; + State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT"; + State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN"; + State[State["COMMENT_START"] = 42] = "COMMENT_START"; + State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH"; + State[State["COMMENT"] = 44] = "COMMENT"; + State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN"; + State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG"; + State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH"; + State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"; + State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH"; + State[State["COMMENT_END"] = 50] = "COMMENT_END"; + State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG"; + State[State["DOCTYPE"] = 52] = "DOCTYPE"; + State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME"; + State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME"; + State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME"; + State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD"; + State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"; + State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"; + State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"; + State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER"; + State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"; + State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD"; + State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"; + State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"; + State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"; + State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER"; + State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE"; + State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION"; + State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET"; + State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END"; + State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE"; + State[State["NAMED_CHARACTER_REFERENCE"] = 72] = "NAMED_CHARACTER_REFERENCE"; + State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND"; + State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE"; + State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START"; + State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 76] = "HEXADEMICAL_CHARACTER_REFERENCE"; + State[State["DECIMAL_CHARACTER_REFERENCE"] = 77] = "DECIMAL_CHARACTER_REFERENCE"; + State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 78] = "NUMERIC_CHARACTER_REFERENCE_END"; +})(State || (State = {})); +//Tokenizer initial states for different modes +export const TokenizerMode = { + DATA: State.DATA, + RCDATA: State.RCDATA, + RAWTEXT: State.RAWTEXT, + SCRIPT_DATA: State.SCRIPT_DATA, + PLAINTEXT: State.PLAINTEXT, + CDATA_SECTION: State.CDATA_SECTION, +}; +//Utils +//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline +//this functions if they will be situated in another module due to context switch. +//Always perform inlining check before modifying this functions ('node --trace-inlining'). +function isAsciiDigit(cp) { + return cp >= $.DIGIT_0 && cp <= $.DIGIT_9; +} +function isAsciiUpper(cp) { + return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_Z; +} +function isAsciiLower(cp) { + return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_Z; +} +function isAsciiLetter(cp) { + return isAsciiLower(cp) || isAsciiUpper(cp); +} +function isAsciiAlphaNumeric(cp) { + return isAsciiLetter(cp) || isAsciiDigit(cp); +} +function isAsciiUpperHexDigit(cp) { + return cp >= $.LATIN_CAPITAL_A && cp <= $.LATIN_CAPITAL_F; +} +function isAsciiLowerHexDigit(cp) { + return cp >= $.LATIN_SMALL_A && cp <= $.LATIN_SMALL_F; +} +function isAsciiHexDigit(cp) { + return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp); +} +function toAsciiLower(cp) { + return cp + 32; +} +function isWhitespace(cp) { + return cp === $.SPACE || cp === $.LINE_FEED || cp === $.TABULATION || cp === $.FORM_FEED; +} +function isEntityInAttributeInvalidEnd(nextCp) { + return nextCp === $.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp); +} +function isScriptDataDoubleEscapeSequenceEnd(cp) { + return isWhitespace(cp) || cp === $.SOLIDUS || cp === $.GREATER_THAN_SIGN; +} +//Tokenizer +export class Tokenizer { + constructor(options, handler) { + this.options = options; + this.handler = handler; + this.paused = false; + /** Ensures that the parsing loop isn't run multiple times at once. */ + this.inLoop = false; + /** + * Indicates that the current adjusted node exists, is not an element in the HTML namespace, + * and that it is not an integration point for either MathML or HTML. + * + * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction} + */ + this.inForeignNode = false; + this.lastStartTagName = ''; + this.active = false; + this.state = State.DATA; + this.returnState = State.DATA; + this.charRefCode = -1; + this.consumedAfterSnapshot = -1; + this.currentCharacterToken = null; + this.currentToken = null; + this.currentAttr = { name: '', value: '' }; + this.preprocessor = new Preprocessor(handler); + this.currentLocation = this.getCurrentLocation(-1); + } + //Errors + _err(code) { + var _a, _b; + (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code)); + } + // NOTE: `offset` may never run across line boundaries. + getCurrentLocation(offset) { + if (!this.options.sourceCodeLocationInfo) { + return null; + } + return { + startLine: this.preprocessor.line, + startCol: this.preprocessor.col - offset, + startOffset: this.preprocessor.offset - offset, + endLine: -1, + endCol: -1, + endOffset: -1, + }; + } + _runParsingLoop() { + if (this.inLoop) + return; + this.inLoop = true; + while (this.active && !this.paused) { + this.consumedAfterSnapshot = 0; + const cp = this._consume(); + if (!this._ensureHibernation()) { + this._callState(cp); + } + } + this.inLoop = false; + } + //API + pause() { + this.paused = true; + } + resume(writeCallback) { + if (!this.paused) { + throw new Error('Parser was already resumed'); + } + this.paused = false; + // Necessary for synchronous resume. + if (this.inLoop) + return; + this._runParsingLoop(); + if (!this.paused) { + writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); + } + } + write(chunk, isLastChunk, writeCallback) { + this.active = true; + this.preprocessor.write(chunk, isLastChunk); + this._runParsingLoop(); + if (!this.paused) { + writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); + } + } + insertHtmlAtCurrentPos(chunk) { + this.active = true; + this.preprocessor.insertHtmlAtCurrentPos(chunk); + this._runParsingLoop(); + } + //Hibernation + _ensureHibernation() { + if (this.preprocessor.endOfChunkHit) { + this._unconsume(this.consumedAfterSnapshot); + this.active = false; + return true; + } + return false; + } + //Consumption + _consume() { + this.consumedAfterSnapshot++; + return this.preprocessor.advance(); + } + _unconsume(count) { + this.consumedAfterSnapshot -= count; + this.preprocessor.retreat(count); + } + _reconsumeInState(state, cp) { + this.state = state; + this._callState(cp); + } + _advanceBy(count) { + this.consumedAfterSnapshot += count; + for (let i = 0; i < count; i++) { + this.preprocessor.advance(); + } + } + _consumeSequenceIfMatch(pattern, caseSensitive) { + if (this.preprocessor.startsWith(pattern, caseSensitive)) { + // We will already have consumed one character before calling this method. + this._advanceBy(pattern.length - 1); + return true; + } + return false; + } + //Token creation + _createStartTagToken() { + this.currentToken = { + type: TokenType.START_TAG, + tagName: '', + tagID: TAG_ID.UNKNOWN, + selfClosing: false, + ackSelfClosing: false, + attrs: [], + location: this.getCurrentLocation(1), + }; + } + _createEndTagToken() { + this.currentToken = { + type: TokenType.END_TAG, + tagName: '', + tagID: TAG_ID.UNKNOWN, + selfClosing: false, + ackSelfClosing: false, + attrs: [], + location: this.getCurrentLocation(2), + }; + } + _createCommentToken(offset) { + this.currentToken = { + type: TokenType.COMMENT, + data: '', + location: this.getCurrentLocation(offset), + }; + } + _createDoctypeToken(initialName) { + this.currentToken = { + type: TokenType.DOCTYPE, + name: initialName, + forceQuirks: false, + publicId: null, + systemId: null, + location: this.currentLocation, + }; + } + _createCharacterToken(type, chars) { + this.currentCharacterToken = { + type, + chars, + location: this.currentLocation, + }; + } + //Tag attributes + _createAttr(attrNameFirstCh) { + this.currentAttr = { + name: attrNameFirstCh, + value: '', + }; + this.currentLocation = this.getCurrentLocation(0); + } + _leaveAttrName() { + var _a; + var _b; + const token = this.currentToken; + if (getTokenAttr(token, this.currentAttr.name) === null) { + token.attrs.push(this.currentAttr); + if (token.location && this.currentLocation) { + const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null))); + attrLocations[this.currentAttr.name] = this.currentLocation; + // Set end location + this._leaveAttrValue(); + } + } + else { + this._err(ERR.duplicateAttribute); + } + } + _leaveAttrValue() { + if (this.currentLocation) { + this.currentLocation.endLine = this.preprocessor.line; + this.currentLocation.endCol = this.preprocessor.col; + this.currentLocation.endOffset = this.preprocessor.offset; + } + } + //Token emission + prepareToken(ct) { + this._emitCurrentCharacterToken(ct.location); + this.currentToken = null; + if (ct.location) { + ct.location.endLine = this.preprocessor.line; + ct.location.endCol = this.preprocessor.col + 1; + ct.location.endOffset = this.preprocessor.offset + 1; + } + this.currentLocation = this.getCurrentLocation(-1); + } + emitCurrentTagToken() { + const ct = this.currentToken; + this.prepareToken(ct); + ct.tagID = getTagID(ct.tagName); + if (ct.type === TokenType.START_TAG) { + this.lastStartTagName = ct.tagName; + this.handler.onStartTag(ct); + } + else { + if (ct.attrs.length > 0) { + this._err(ERR.endTagWithAttributes); + } + if (ct.selfClosing) { + this._err(ERR.endTagWithTrailingSolidus); + } + this.handler.onEndTag(ct); + } + this.preprocessor.dropParsedChunk(); + } + emitCurrentComment(ct) { + this.prepareToken(ct); + this.handler.onComment(ct); + this.preprocessor.dropParsedChunk(); + } + emitCurrentDoctype(ct) { + this.prepareToken(ct); + this.handler.onDoctype(ct); + this.preprocessor.dropParsedChunk(); + } + _emitCurrentCharacterToken(nextLocation) { + if (this.currentCharacterToken) { + //NOTE: if we have a pending character token, make it's end location equal to the + //current token's start location. + if (nextLocation && this.currentCharacterToken.location) { + this.currentCharacterToken.location.endLine = nextLocation.startLine; + this.currentCharacterToken.location.endCol = nextLocation.startCol; + this.currentCharacterToken.location.endOffset = nextLocation.startOffset; + } + switch (this.currentCharacterToken.type) { + case TokenType.CHARACTER: { + this.handler.onCharacter(this.currentCharacterToken); + break; + } + case TokenType.NULL_CHARACTER: { + this.handler.onNullCharacter(this.currentCharacterToken); + break; + } + case TokenType.WHITESPACE_CHARACTER: { + this.handler.onWhitespaceCharacter(this.currentCharacterToken); + break; + } + } + this.currentCharacterToken = null; + } + } + _emitEOFToken() { + const location = this.getCurrentLocation(0); + if (location) { + location.endLine = location.startLine; + location.endCol = location.startCol; + location.endOffset = location.startOffset; + } + this._emitCurrentCharacterToken(location); + this.handler.onEof({ type: TokenType.EOF, location }); + this.active = false; + } + //Characters emission + //OPTIMIZATION: specification uses only one type of character tokens (one token per character). + //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters. + //If we have a sequence of characters that belong to the same group, the parser can process it + //as a single solid character token. + //So, there are 3 types of character tokens in parse5: + //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000') + //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f') + //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^') + _appendCharToCurrentCharacterToken(type, ch) { + if (this.currentCharacterToken) { + if (this.currentCharacterToken.type !== type) { + this.currentLocation = this.getCurrentLocation(0); + this._emitCurrentCharacterToken(this.currentLocation); + this.preprocessor.dropParsedChunk(); + } + else { + this.currentCharacterToken.chars += ch; + return; + } + } + this._createCharacterToken(type, ch); + } + _emitCodePoint(cp) { + const type = isWhitespace(cp) + ? TokenType.WHITESPACE_CHARACTER + : cp === $.NULL + ? TokenType.NULL_CHARACTER + : TokenType.CHARACTER; + this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp)); + } + //NOTE: used when we emit characters explicitly. + //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks. + _emitChars(ch) { + this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch); + } + // Character reference helpers + _matchNamedCharacterReference(cp) { + let result = null; + let excess = 0; + let withoutSemicolon = false; + for (let i = 0, current = htmlDecodeTree[0]; i >= 0; cp = this._consume()) { + i = determineBranch(htmlDecodeTree, current, i + 1, cp); + if (i < 0) + break; + excess += 1; + current = htmlDecodeTree[i]; + const masked = current & BinTrieFlags.VALUE_LENGTH; + // If the branch is a value, store it and continue + if (masked) { + // The mask is the number of bytes of the value, including the current byte. + const valueLength = (masked >> 14) - 1; + // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. + // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state + if (cp !== $.SEMICOLON && + this._isCharacterReferenceInAttribute() && + isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) { + //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after + //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes. + result = [$.AMPERSAND]; + // Skip over the value. + i += valueLength; + } + else { + // If this is a surrogate pair, consume the next two bytes. + result = + valueLength === 0 + ? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH] + : valueLength === 1 + ? [htmlDecodeTree[++i]] + : [htmlDecodeTree[++i], htmlDecodeTree[++i]]; + excess = 0; + withoutSemicolon = cp !== $.SEMICOLON; + } + if (valueLength === 0) { + // If the value is zero-length, we're done. + this._consume(); + break; + } + } + } + this._unconsume(excess); + if (withoutSemicolon && !this.preprocessor.endOfChunkHit) { + this._err(ERR.missingSemicolonAfterCharacterReference); + } + // We want to emit the error above on the code point after the entity. + // We always consume one code point too many in the loop, and we wait to + // unconsume it until after the error is emitted. + this._unconsume(1); + return result; + } + _isCharacterReferenceInAttribute() { + return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED || + this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED || + this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED); + } + _flushCodePointConsumedAsCharacterReference(cp) { + if (this._isCharacterReferenceInAttribute()) { + this.currentAttr.value += String.fromCodePoint(cp); + } + else { + this._emitCodePoint(cp); + } + } + // Calling states this way turns out to be much faster than any other approach. + _callState(cp) { + switch (this.state) { + case State.DATA: { + this._stateData(cp); + break; + } + case State.RCDATA: { + this._stateRcdata(cp); + break; + } + case State.RAWTEXT: { + this._stateRawtext(cp); + break; + } + case State.SCRIPT_DATA: { + this._stateScriptData(cp); + break; + } + case State.PLAINTEXT: { + this._statePlaintext(cp); + break; + } + case State.TAG_OPEN: { + this._stateTagOpen(cp); + break; + } + case State.END_TAG_OPEN: { + this._stateEndTagOpen(cp); + break; + } + case State.TAG_NAME: { + this._stateTagName(cp); + break; + } + case State.RCDATA_LESS_THAN_SIGN: { + this._stateRcdataLessThanSign(cp); + break; + } + case State.RCDATA_END_TAG_OPEN: { + this._stateRcdataEndTagOpen(cp); + break; + } + case State.RCDATA_END_TAG_NAME: { + this._stateRcdataEndTagName(cp); + break; + } + case State.RAWTEXT_LESS_THAN_SIGN: { + this._stateRawtextLessThanSign(cp); + break; + } + case State.RAWTEXT_END_TAG_OPEN: { + this._stateRawtextEndTagOpen(cp); + break; + } + case State.RAWTEXT_END_TAG_NAME: { + this._stateRawtextEndTagName(cp); + break; + } + case State.SCRIPT_DATA_LESS_THAN_SIGN: { + this._stateScriptDataLessThanSign(cp); + break; + } + case State.SCRIPT_DATA_END_TAG_OPEN: { + this._stateScriptDataEndTagOpen(cp); + break; + } + case State.SCRIPT_DATA_END_TAG_NAME: { + this._stateScriptDataEndTagName(cp); + break; + } + case State.SCRIPT_DATA_ESCAPE_START: { + this._stateScriptDataEscapeStart(cp); + break; + } + case State.SCRIPT_DATA_ESCAPE_START_DASH: { + this._stateScriptDataEscapeStartDash(cp); + break; + } + case State.SCRIPT_DATA_ESCAPED: { + this._stateScriptDataEscaped(cp); + break; + } + case State.SCRIPT_DATA_ESCAPED_DASH: { + this._stateScriptDataEscapedDash(cp); + break; + } + case State.SCRIPT_DATA_ESCAPED_DASH_DASH: { + this._stateScriptDataEscapedDashDash(cp); + break; + } + case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { + this._stateScriptDataEscapedLessThanSign(cp); + break; + } + case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: { + this._stateScriptDataEscapedEndTagOpen(cp); + break; + } + case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: { + this._stateScriptDataEscapedEndTagName(cp); + break; + } + case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: { + this._stateScriptDataDoubleEscapeStart(cp); + break; + } + case State.SCRIPT_DATA_DOUBLE_ESCAPED: { + this._stateScriptDataDoubleEscaped(cp); + break; + } + case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { + this._stateScriptDataDoubleEscapedDash(cp); + break; + } + case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { + this._stateScriptDataDoubleEscapedDashDash(cp); + break; + } + case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { + this._stateScriptDataDoubleEscapedLessThanSign(cp); + break; + } + case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: { + this._stateScriptDataDoubleEscapeEnd(cp); + break; + } + case State.BEFORE_ATTRIBUTE_NAME: { + this._stateBeforeAttributeName(cp); + break; + } + case State.ATTRIBUTE_NAME: { + this._stateAttributeName(cp); + break; + } + case State.AFTER_ATTRIBUTE_NAME: { + this._stateAfterAttributeName(cp); + break; + } + case State.BEFORE_ATTRIBUTE_VALUE: { + this._stateBeforeAttributeValue(cp); + break; + } + case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: { + this._stateAttributeValueDoubleQuoted(cp); + break; + } + case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: { + this._stateAttributeValueSingleQuoted(cp); + break; + } + case State.ATTRIBUTE_VALUE_UNQUOTED: { + this._stateAttributeValueUnquoted(cp); + break; + } + case State.AFTER_ATTRIBUTE_VALUE_QUOTED: { + this._stateAfterAttributeValueQuoted(cp); + break; + } + case State.SELF_CLOSING_START_TAG: { + this._stateSelfClosingStartTag(cp); + break; + } + case State.BOGUS_COMMENT: { + this._stateBogusComment(cp); + break; + } + case State.MARKUP_DECLARATION_OPEN: { + this._stateMarkupDeclarationOpen(cp); + break; + } + case State.COMMENT_START: { + this._stateCommentStart(cp); + break; + } + case State.COMMENT_START_DASH: { + this._stateCommentStartDash(cp); + break; + } + case State.COMMENT: { + this._stateComment(cp); + break; + } + case State.COMMENT_LESS_THAN_SIGN: { + this._stateCommentLessThanSign(cp); + break; + } + case State.COMMENT_LESS_THAN_SIGN_BANG: { + this._stateCommentLessThanSignBang(cp); + break; + } + case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: { + this._stateCommentLessThanSignBangDash(cp); + break; + } + case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: { + this._stateCommentLessThanSignBangDashDash(cp); + break; + } + case State.COMMENT_END_DASH: { + this._stateCommentEndDash(cp); + break; + } + case State.COMMENT_END: { + this._stateCommentEnd(cp); + break; + } + case State.COMMENT_END_BANG: { + this._stateCommentEndBang(cp); + break; + } + case State.DOCTYPE: { + this._stateDoctype(cp); + break; + } + case State.BEFORE_DOCTYPE_NAME: { + this._stateBeforeDoctypeName(cp); + break; + } + case State.DOCTYPE_NAME: { + this._stateDoctypeName(cp); + break; + } + case State.AFTER_DOCTYPE_NAME: { + this._stateAfterDoctypeName(cp); + break; + } + case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: { + this._stateAfterDoctypePublicKeyword(cp); + break; + } + case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { + this._stateBeforeDoctypePublicIdentifier(cp); + break; + } + case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { + this._stateDoctypePublicIdentifierDoubleQuoted(cp); + break; + } + case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { + this._stateDoctypePublicIdentifierSingleQuoted(cp); + break; + } + case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { + this._stateAfterDoctypePublicIdentifier(cp); + break; + } + case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { + this._stateBetweenDoctypePublicAndSystemIdentifiers(cp); + break; + } + case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: { + this._stateAfterDoctypeSystemKeyword(cp); + break; + } + case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { + this._stateBeforeDoctypeSystemIdentifier(cp); + break; + } + case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { + this._stateDoctypeSystemIdentifierDoubleQuoted(cp); + break; + } + case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { + this._stateDoctypeSystemIdentifierSingleQuoted(cp); + break; + } + case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { + this._stateAfterDoctypeSystemIdentifier(cp); + break; + } + case State.BOGUS_DOCTYPE: { + this._stateBogusDoctype(cp); + break; + } + case State.CDATA_SECTION: { + this._stateCdataSection(cp); + break; + } + case State.CDATA_SECTION_BRACKET: { + this._stateCdataSectionBracket(cp); + break; + } + case State.CDATA_SECTION_END: { + this._stateCdataSectionEnd(cp); + break; + } + case State.CHARACTER_REFERENCE: { + this._stateCharacterReference(cp); + break; + } + case State.NAMED_CHARACTER_REFERENCE: { + this._stateNamedCharacterReference(cp); + break; + } + case State.AMBIGUOUS_AMPERSAND: { + this._stateAmbiguousAmpersand(cp); + break; + } + case State.NUMERIC_CHARACTER_REFERENCE: { + this._stateNumericCharacterReference(cp); + break; + } + case State.HEXADEMICAL_CHARACTER_REFERENCE_START: { + this._stateHexademicalCharacterReferenceStart(cp); + break; + } + case State.HEXADEMICAL_CHARACTER_REFERENCE: { + this._stateHexademicalCharacterReference(cp); + break; + } + case State.DECIMAL_CHARACTER_REFERENCE: { + this._stateDecimalCharacterReference(cp); + break; + } + case State.NUMERIC_CHARACTER_REFERENCE_END: { + this._stateNumericCharacterReferenceEnd(cp); + break; + } + default: { + throw new Error('Unknown state'); + } + } + } + // State machine + // Data state + //------------------------------------------------------------------ + _stateData(cp) { + switch (cp) { + case $.LESS_THAN_SIGN: { + this.state = State.TAG_OPEN; + break; + } + case $.AMPERSAND: { + this.returnState = State.DATA; + this.state = State.CHARACTER_REFERENCE; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitCodePoint(cp); + break; + } + case $.EOF: { + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // RCDATA state + //------------------------------------------------------------------ + _stateRcdata(cp) { + switch (cp) { + case $.AMPERSAND: { + this.returnState = State.RCDATA; + this.state = State.CHARACTER_REFERENCE; + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.RCDATA_LESS_THAN_SIGN; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // RAWTEXT state + //------------------------------------------------------------------ + _stateRawtext(cp) { + switch (cp) { + case $.LESS_THAN_SIGN: { + this.state = State.RAWTEXT_LESS_THAN_SIGN; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // Script data state + //------------------------------------------------------------------ + _stateScriptData(cp) { + switch (cp) { + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_LESS_THAN_SIGN; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // PLAINTEXT state + //------------------------------------------------------------------ + _statePlaintext(cp) { + switch (cp) { + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // Tag open state + //------------------------------------------------------------------ + _stateTagOpen(cp) { + if (isAsciiLetter(cp)) { + this._createStartTagToken(); + this.state = State.TAG_NAME; + this._stateTagName(cp); + } + else + switch (cp) { + case $.EXCLAMATION_MARK: { + this.state = State.MARKUP_DECLARATION_OPEN; + break; + } + case $.SOLIDUS: { + this.state = State.END_TAG_OPEN; + break; + } + case $.QUESTION_MARK: { + this._err(ERR.unexpectedQuestionMarkInsteadOfTagName); + this._createCommentToken(1); + this.state = State.BOGUS_COMMENT; + this._stateBogusComment(cp); + break; + } + case $.EOF: { + this._err(ERR.eofBeforeTagName); + this._emitChars('<'); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.invalidFirstCharacterOfTagName); + this._emitChars('<'); + this.state = State.DATA; + this._stateData(cp); + } + } + } + // End tag open state + //------------------------------------------------------------------ + _stateEndTagOpen(cp) { + if (isAsciiLetter(cp)) { + this._createEndTagToken(); + this.state = State.TAG_NAME; + this._stateTagName(cp); + } + else + switch (cp) { + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingEndTagName); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofBeforeTagName); + this._emitChars('</'); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.invalidFirstCharacterOfTagName); + this._createCommentToken(2); + this.state = State.BOGUS_COMMENT; + this._stateBogusComment(cp); + } + } + } + // Tag name state + //------------------------------------------------------------------ + _stateTagName(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this.state = State.BEFORE_ATTRIBUTE_NAME; + break; + } + case $.SOLIDUS: { + this.state = State.SELF_CLOSING_START_TAG; + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentTagToken(); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.tagName += REPLACEMENT_CHARACTER; + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); + } + } + } + // RCDATA less-than sign state + //------------------------------------------------------------------ + _stateRcdataLessThanSign(cp) { + if (cp === $.SOLIDUS) { + this.state = State.RCDATA_END_TAG_OPEN; + } + else { + this._emitChars('<'); + this.state = State.RCDATA; + this._stateRcdata(cp); + } + } + // RCDATA end tag open state + //------------------------------------------------------------------ + _stateRcdataEndTagOpen(cp) { + if (isAsciiLetter(cp)) { + this.state = State.RCDATA_END_TAG_NAME; + this._stateRcdataEndTagName(cp); + } + else { + this._emitChars('</'); + this.state = State.RCDATA; + this._stateRcdata(cp); + } + } + handleSpecialEndTag(_cp) { + if (!this.preprocessor.startsWith(this.lastStartTagName, false)) { + return !this._ensureHibernation(); + } + this._createEndTagToken(); + const token = this.currentToken; + token.tagName = this.lastStartTagName; + const cp = this.preprocessor.peek(this.lastStartTagName.length); + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this._advanceBy(this.lastStartTagName.length); + this.state = State.BEFORE_ATTRIBUTE_NAME; + return false; + } + case $.SOLIDUS: { + this._advanceBy(this.lastStartTagName.length); + this.state = State.SELF_CLOSING_START_TAG; + return false; + } + case $.GREATER_THAN_SIGN: { + this._advanceBy(this.lastStartTagName.length); + this.emitCurrentTagToken(); + this.state = State.DATA; + return false; + } + default: { + return !this._ensureHibernation(); + } + } + } + // RCDATA end tag name state + //------------------------------------------------------------------ + _stateRcdataEndTagName(cp) { + if (this.handleSpecialEndTag(cp)) { + this._emitChars('</'); + this.state = State.RCDATA; + this._stateRcdata(cp); + } + } + // RAWTEXT less-than sign state + //------------------------------------------------------------------ + _stateRawtextLessThanSign(cp) { + if (cp === $.SOLIDUS) { + this.state = State.RAWTEXT_END_TAG_OPEN; + } + else { + this._emitChars('<'); + this.state = State.RAWTEXT; + this._stateRawtext(cp); + } + } + // RAWTEXT end tag open state + //------------------------------------------------------------------ + _stateRawtextEndTagOpen(cp) { + if (isAsciiLetter(cp)) { + this.state = State.RAWTEXT_END_TAG_NAME; + this._stateRawtextEndTagName(cp); + } + else { + this._emitChars('</'); + this.state = State.RAWTEXT; + this._stateRawtext(cp); + } + } + // RAWTEXT end tag name state + //------------------------------------------------------------------ + _stateRawtextEndTagName(cp) { + if (this.handleSpecialEndTag(cp)) { + this._emitChars('</'); + this.state = State.RAWTEXT; + this._stateRawtext(cp); + } + } + // Script data less-than sign state + //------------------------------------------------------------------ + _stateScriptDataLessThanSign(cp) { + switch (cp) { + case $.SOLIDUS: { + this.state = State.SCRIPT_DATA_END_TAG_OPEN; + break; + } + case $.EXCLAMATION_MARK: { + this.state = State.SCRIPT_DATA_ESCAPE_START; + this._emitChars('<!'); + break; + } + default: { + this._emitChars('<'); + this.state = State.SCRIPT_DATA; + this._stateScriptData(cp); + } + } + } + // Script data end tag open state + //------------------------------------------------------------------ + _stateScriptDataEndTagOpen(cp) { + if (isAsciiLetter(cp)) { + this.state = State.SCRIPT_DATA_END_TAG_NAME; + this._stateScriptDataEndTagName(cp); + } + else { + this._emitChars('</'); + this.state = State.SCRIPT_DATA; + this._stateScriptData(cp); + } + } + // Script data end tag name state + //------------------------------------------------------------------ + _stateScriptDataEndTagName(cp) { + if (this.handleSpecialEndTag(cp)) { + this._emitChars('</'); + this.state = State.SCRIPT_DATA; + this._stateScriptData(cp); + } + } + // Script data escape start state + //------------------------------------------------------------------ + _stateScriptDataEscapeStart(cp) { + if (cp === $.HYPHEN_MINUS) { + this.state = State.SCRIPT_DATA_ESCAPE_START_DASH; + this._emitChars('-'); + } + else { + this.state = State.SCRIPT_DATA; + this._stateScriptData(cp); + } + } + // Script data escape start dash state + //------------------------------------------------------------------ + _stateScriptDataEscapeStartDash(cp) { + if (cp === $.HYPHEN_MINUS) { + this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; + this._emitChars('-'); + } + else { + this.state = State.SCRIPT_DATA; + this._stateScriptData(cp); + } + } + // Script data escaped state + //------------------------------------------------------------------ + _stateScriptDataEscaped(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.SCRIPT_DATA_ESCAPED_DASH; + this._emitChars('-'); + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._err(ERR.eofInScriptHtmlCommentLikeText); + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // Script data escaped dash state + //------------------------------------------------------------------ + _stateScriptDataEscapedDash(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH; + this._emitChars('-'); + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.state = State.SCRIPT_DATA_ESCAPED; + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._err(ERR.eofInScriptHtmlCommentLikeText); + this._emitEOFToken(); + break; + } + default: { + this.state = State.SCRIPT_DATA_ESCAPED; + this._emitCodePoint(cp); + } + } + } + // Script data escaped dash dash state + //------------------------------------------------------------------ + _stateScriptDataEscapedDashDash(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this._emitChars('-'); + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.SCRIPT_DATA; + this._emitChars('>'); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.state = State.SCRIPT_DATA_ESCAPED; + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._err(ERR.eofInScriptHtmlCommentLikeText); + this._emitEOFToken(); + break; + } + default: { + this.state = State.SCRIPT_DATA_ESCAPED; + this._emitCodePoint(cp); + } + } + } + // Script data escaped less-than sign state + //------------------------------------------------------------------ + _stateScriptDataEscapedLessThanSign(cp) { + if (cp === $.SOLIDUS) { + this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN; + } + else if (isAsciiLetter(cp)) { + this._emitChars('<'); + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START; + this._stateScriptDataDoubleEscapeStart(cp); + } + else { + this._emitChars('<'); + this.state = State.SCRIPT_DATA_ESCAPED; + this._stateScriptDataEscaped(cp); + } + } + // Script data escaped end tag open state + //------------------------------------------------------------------ + _stateScriptDataEscapedEndTagOpen(cp) { + if (isAsciiLetter(cp)) { + this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME; + this._stateScriptDataEscapedEndTagName(cp); + } + else { + this._emitChars('</'); + this.state = State.SCRIPT_DATA_ESCAPED; + this._stateScriptDataEscaped(cp); + } + } + // Script data escaped end tag name state + //------------------------------------------------------------------ + _stateScriptDataEscapedEndTagName(cp) { + if (this.handleSpecialEndTag(cp)) { + this._emitChars('</'); + this.state = State.SCRIPT_DATA_ESCAPED; + this._stateScriptDataEscaped(cp); + } + } + // Script data double escape start state + //------------------------------------------------------------------ + _stateScriptDataDoubleEscapeStart(cp) { + if (this.preprocessor.startsWith($$.SCRIPT, false) && + isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) { + this._emitCodePoint(cp); + for (let i = 0; i < $$.SCRIPT.length; i++) { + this._emitCodePoint(this._consume()); + } + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + } + else if (!this._ensureHibernation()) { + this.state = State.SCRIPT_DATA_ESCAPED; + this._stateScriptDataEscaped(cp); + } + } + // Script data double escaped state + //------------------------------------------------------------------ + _stateScriptDataDoubleEscaped(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH; + this._emitChars('-'); + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; + this._emitChars('<'); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._err(ERR.eofInScriptHtmlCommentLikeText); + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // Script data double escaped dash state + //------------------------------------------------------------------ + _stateScriptDataDoubleEscapedDash(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; + this._emitChars('-'); + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; + this._emitChars('<'); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._err(ERR.eofInScriptHtmlCommentLikeText); + this._emitEOFToken(); + break; + } + default: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + this._emitCodePoint(cp); + } + } + } + // Script data double escaped dash dash state + //------------------------------------------------------------------ + _stateScriptDataDoubleEscapedDashDash(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this._emitChars('-'); + break; + } + case $.LESS_THAN_SIGN: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; + this._emitChars('<'); + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.SCRIPT_DATA; + this._emitChars('>'); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + this._emitChars(REPLACEMENT_CHARACTER); + break; + } + case $.EOF: { + this._err(ERR.eofInScriptHtmlCommentLikeText); + this._emitEOFToken(); + break; + } + default: { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + this._emitCodePoint(cp); + } + } + } + // Script data double escaped less-than sign state + //------------------------------------------------------------------ + _stateScriptDataDoubleEscapedLessThanSign(cp) { + if (cp === $.SOLIDUS) { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END; + this._emitChars('/'); + } + else { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + this._stateScriptDataDoubleEscaped(cp); + } + } + // Script data double escape end state + //------------------------------------------------------------------ + _stateScriptDataDoubleEscapeEnd(cp) { + if (this.preprocessor.startsWith($$.SCRIPT, false) && + isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek($$.SCRIPT.length))) { + this._emitCodePoint(cp); + for (let i = 0; i < $$.SCRIPT.length; i++) { + this._emitCodePoint(this._consume()); + } + this.state = State.SCRIPT_DATA_ESCAPED; + } + else if (!this._ensureHibernation()) { + this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; + this._stateScriptDataDoubleEscaped(cp); + } + } + // Before attribute name state + //------------------------------------------------------------------ + _stateBeforeAttributeName(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.SOLIDUS: + case $.GREATER_THAN_SIGN: + case $.EOF: { + this.state = State.AFTER_ATTRIBUTE_NAME; + this._stateAfterAttributeName(cp); + break; + } + case $.EQUALS_SIGN: { + this._err(ERR.unexpectedEqualsSignBeforeAttributeName); + this._createAttr('='); + this.state = State.ATTRIBUTE_NAME; + break; + } + default: { + this._createAttr(''); + this.state = State.ATTRIBUTE_NAME; + this._stateAttributeName(cp); + } + } + } + // Attribute name state + //------------------------------------------------------------------ + _stateAttributeName(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: + case $.SOLIDUS: + case $.GREATER_THAN_SIGN: + case $.EOF: { + this._leaveAttrName(); + this.state = State.AFTER_ATTRIBUTE_NAME; + this._stateAfterAttributeName(cp); + break; + } + case $.EQUALS_SIGN: { + this._leaveAttrName(); + this.state = State.BEFORE_ATTRIBUTE_VALUE; + break; + } + case $.QUOTATION_MARK: + case $.APOSTROPHE: + case $.LESS_THAN_SIGN: { + this._err(ERR.unexpectedCharacterInAttributeName); + this.currentAttr.name += String.fromCodePoint(cp); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.currentAttr.name += REPLACEMENT_CHARACTER; + break; + } + default: { + this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); + } + } + } + // After attribute name state + //------------------------------------------------------------------ + _stateAfterAttributeName(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.SOLIDUS: { + this.state = State.SELF_CLOSING_START_TAG; + break; + } + case $.EQUALS_SIGN: { + this.state = State.BEFORE_ATTRIBUTE_VALUE; + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentTagToken(); + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + this._createAttr(''); + this.state = State.ATTRIBUTE_NAME; + this._stateAttributeName(cp); + } + } + } + // Before attribute value state + //------------------------------------------------------------------ + _stateBeforeAttributeValue(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.QUOTATION_MARK: { + this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingAttributeValue); + this.state = State.DATA; + this.emitCurrentTagToken(); + break; + } + default: { + this.state = State.ATTRIBUTE_VALUE_UNQUOTED; + this._stateAttributeValueUnquoted(cp); + } + } + } + // Attribute value (double-quoted) state + //------------------------------------------------------------------ + _stateAttributeValueDoubleQuoted(cp) { + switch (cp) { + case $.QUOTATION_MARK: { + this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; + break; + } + case $.AMPERSAND: { + this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; + this.state = State.CHARACTER_REFERENCE; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.currentAttr.value += REPLACEMENT_CHARACTER; + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + this.currentAttr.value += String.fromCodePoint(cp); + } + } + } + // Attribute value (single-quoted) state + //------------------------------------------------------------------ + _stateAttributeValueSingleQuoted(cp) { + switch (cp) { + case $.APOSTROPHE: { + this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; + break; + } + case $.AMPERSAND: { + this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; + this.state = State.CHARACTER_REFERENCE; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.currentAttr.value += REPLACEMENT_CHARACTER; + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + this.currentAttr.value += String.fromCodePoint(cp); + } + } + } + // Attribute value (unquoted) state + //------------------------------------------------------------------ + _stateAttributeValueUnquoted(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this._leaveAttrValue(); + this.state = State.BEFORE_ATTRIBUTE_NAME; + break; + } + case $.AMPERSAND: { + this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED; + this.state = State.CHARACTER_REFERENCE; + break; + } + case $.GREATER_THAN_SIGN: { + this._leaveAttrValue(); + this.state = State.DATA; + this.emitCurrentTagToken(); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this.currentAttr.value += REPLACEMENT_CHARACTER; + break; + } + case $.QUOTATION_MARK: + case $.APOSTROPHE: + case $.LESS_THAN_SIGN: + case $.EQUALS_SIGN: + case $.GRAVE_ACCENT: { + this._err(ERR.unexpectedCharacterInUnquotedAttributeValue); + this.currentAttr.value += String.fromCodePoint(cp); + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + this.currentAttr.value += String.fromCodePoint(cp); + } + } + } + // After attribute value (quoted) state + //------------------------------------------------------------------ + _stateAfterAttributeValueQuoted(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this._leaveAttrValue(); + this.state = State.BEFORE_ATTRIBUTE_NAME; + break; + } + case $.SOLIDUS: { + this._leaveAttrValue(); + this.state = State.SELF_CLOSING_START_TAG; + break; + } + case $.GREATER_THAN_SIGN: { + this._leaveAttrValue(); + this.state = State.DATA; + this.emitCurrentTagToken(); + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingWhitespaceBetweenAttributes); + this.state = State.BEFORE_ATTRIBUTE_NAME; + this._stateBeforeAttributeName(cp); + } + } + } + // Self-closing start tag state + //------------------------------------------------------------------ + _stateSelfClosingStartTag(cp) { + switch (cp) { + case $.GREATER_THAN_SIGN: { + const token = this.currentToken; + token.selfClosing = true; + this.state = State.DATA; + this.emitCurrentTagToken(); + break; + } + case $.EOF: { + this._err(ERR.eofInTag); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.unexpectedSolidusInTag); + this.state = State.BEFORE_ATTRIBUTE_NAME; + this._stateBeforeAttributeName(cp); + } + } + } + // Bogus comment state + //------------------------------------------------------------------ + _stateBogusComment(cp) { + const token = this.currentToken; + switch (cp) { + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentComment(token); + break; + } + case $.EOF: { + this.emitCurrentComment(token); + this._emitEOFToken(); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.data += REPLACEMENT_CHARACTER; + break; + } + default: { + token.data += String.fromCodePoint(cp); + } + } + } + // Markup declaration open state + //------------------------------------------------------------------ + _stateMarkupDeclarationOpen(cp) { + if (this._consumeSequenceIfMatch($$.DASH_DASH, true)) { + this._createCommentToken($$.DASH_DASH.length + 1); + this.state = State.COMMENT_START; + } + else if (this._consumeSequenceIfMatch($$.DOCTYPE, false)) { + // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here. + this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1); + this.state = State.DOCTYPE; + } + else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) { + if (this.inForeignNode) { + this.state = State.CDATA_SECTION; + } + else { + this._err(ERR.cdataInHtmlContent); + this._createCommentToken($$.CDATA_START.length + 1); + this.currentToken.data = '[CDATA['; + this.state = State.BOGUS_COMMENT; + } + } + //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup + //results are no longer valid and we will need to start over. + else if (!this._ensureHibernation()) { + this._err(ERR.incorrectlyOpenedComment); + this._createCommentToken(2); + this.state = State.BOGUS_COMMENT; + this._stateBogusComment(cp); + } + } + // Comment start state + //------------------------------------------------------------------ + _stateCommentStart(cp) { + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.COMMENT_START_DASH; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.abruptClosingOfEmptyComment); + this.state = State.DATA; + const token = this.currentToken; + this.emitCurrentComment(token); + break; + } + default: { + this.state = State.COMMENT; + this._stateComment(cp); + } + } + } + // Comment start dash state + //------------------------------------------------------------------ + _stateCommentStartDash(cp) { + const token = this.currentToken; + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.COMMENT_END; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.abruptClosingOfEmptyComment); + this.state = State.DATA; + this.emitCurrentComment(token); + break; + } + case $.EOF: { + this._err(ERR.eofInComment); + this.emitCurrentComment(token); + this._emitEOFToken(); + break; + } + default: { + token.data += '-'; + this.state = State.COMMENT; + this._stateComment(cp); + } + } + } + // Comment state + //------------------------------------------------------------------ + _stateComment(cp) { + const token = this.currentToken; + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.COMMENT_END_DASH; + break; + } + case $.LESS_THAN_SIGN: { + token.data += '<'; + this.state = State.COMMENT_LESS_THAN_SIGN; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.data += REPLACEMENT_CHARACTER; + break; + } + case $.EOF: { + this._err(ERR.eofInComment); + this.emitCurrentComment(token); + this._emitEOFToken(); + break; + } + default: { + token.data += String.fromCodePoint(cp); + } + } + } + // Comment less-than sign state + //------------------------------------------------------------------ + _stateCommentLessThanSign(cp) { + const token = this.currentToken; + switch (cp) { + case $.EXCLAMATION_MARK: { + token.data += '!'; + this.state = State.COMMENT_LESS_THAN_SIGN_BANG; + break; + } + case $.LESS_THAN_SIGN: { + token.data += '<'; + break; + } + default: { + this.state = State.COMMENT; + this._stateComment(cp); + } + } + } + // Comment less-than sign bang state + //------------------------------------------------------------------ + _stateCommentLessThanSignBang(cp) { + if (cp === $.HYPHEN_MINUS) { + this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH; + } + else { + this.state = State.COMMENT; + this._stateComment(cp); + } + } + // Comment less-than sign bang dash state + //------------------------------------------------------------------ + _stateCommentLessThanSignBangDash(cp) { + if (cp === $.HYPHEN_MINUS) { + this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; + } + else { + this.state = State.COMMENT_END_DASH; + this._stateCommentEndDash(cp); + } + } + // Comment less-than sign bang dash dash state + //------------------------------------------------------------------ + _stateCommentLessThanSignBangDashDash(cp) { + if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) { + this._err(ERR.nestedComment); + } + this.state = State.COMMENT_END; + this._stateCommentEnd(cp); + } + // Comment end dash state + //------------------------------------------------------------------ + _stateCommentEndDash(cp) { + const token = this.currentToken; + switch (cp) { + case $.HYPHEN_MINUS: { + this.state = State.COMMENT_END; + break; + } + case $.EOF: { + this._err(ERR.eofInComment); + this.emitCurrentComment(token); + this._emitEOFToken(); + break; + } + default: { + token.data += '-'; + this.state = State.COMMENT; + this._stateComment(cp); + } + } + } + // Comment end state + //------------------------------------------------------------------ + _stateCommentEnd(cp) { + const token = this.currentToken; + switch (cp) { + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentComment(token); + break; + } + case $.EXCLAMATION_MARK: { + this.state = State.COMMENT_END_BANG; + break; + } + case $.HYPHEN_MINUS: { + token.data += '-'; + break; + } + case $.EOF: { + this._err(ERR.eofInComment); + this.emitCurrentComment(token); + this._emitEOFToken(); + break; + } + default: { + token.data += '--'; + this.state = State.COMMENT; + this._stateComment(cp); + } + } + } + // Comment end bang state + //------------------------------------------------------------------ + _stateCommentEndBang(cp) { + const token = this.currentToken; + switch (cp) { + case $.HYPHEN_MINUS: { + token.data += '--!'; + this.state = State.COMMENT_END_DASH; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.incorrectlyClosedComment); + this.state = State.DATA; + this.emitCurrentComment(token); + break; + } + case $.EOF: { + this._err(ERR.eofInComment); + this.emitCurrentComment(token); + this._emitEOFToken(); + break; + } + default: { + token.data += '--!'; + this.state = State.COMMENT; + this._stateComment(cp); + } + } + } + // DOCTYPE state + //------------------------------------------------------------------ + _stateDoctype(cp) { + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this.state = State.BEFORE_DOCTYPE_NAME; + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.BEFORE_DOCTYPE_NAME; + this._stateBeforeDoctypeName(cp); + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + this._createDoctypeToken(null); + const token = this.currentToken; + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingWhitespaceBeforeDoctypeName); + this.state = State.BEFORE_DOCTYPE_NAME; + this._stateBeforeDoctypeName(cp); + } + } + } + // Before DOCTYPE name state + //------------------------------------------------------------------ + _stateBeforeDoctypeName(cp) { + if (isAsciiUpper(cp)) { + this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp))); + this.state = State.DOCTYPE_NAME; + } + else + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + this._createDoctypeToken(REPLACEMENT_CHARACTER); + this.state = State.DOCTYPE_NAME; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingDoctypeName); + this._createDoctypeToken(null); + const token = this.currentToken; + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + this._createDoctypeToken(null); + const token = this.currentToken; + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._createDoctypeToken(String.fromCodePoint(cp)); + this.state = State.DOCTYPE_NAME; + } + } + } + // DOCTYPE name state + //------------------------------------------------------------------ + _stateDoctypeName(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this.state = State.AFTER_DOCTYPE_NAME; + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.name += REPLACEMENT_CHARACTER; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); + } + } + } + // After DOCTYPE name state + //------------------------------------------------------------------ + _stateAfterDoctypeName(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + if (this._consumeSequenceIfMatch($$.PUBLIC, false)) { + this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD; + } + else if (this._consumeSequenceIfMatch($$.SYSTEM, false)) { + this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD; + } + //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup + //results are no longer valid and we will need to start over. + else if (!this._ensureHibernation()) { + this._err(ERR.invalidCharacterSequenceAfterDoctypeName); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + } + // After DOCTYPE public keyword state + //------------------------------------------------------------------ + _stateAfterDoctypePublicKeyword(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; + break; + } + case $.QUOTATION_MARK: { + this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); + token.publicId = ''; + this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword); + token.publicId = ''; + this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingDoctypePublicIdentifier); + token.forceQuirks = true; + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // Before DOCTYPE public identifier state + //------------------------------------------------------------------ + _stateBeforeDoctypePublicIdentifier(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.QUOTATION_MARK: { + token.publicId = ''; + this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + token.publicId = ''; + this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingDoctypePublicIdentifier); + token.forceQuirks = true; + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // DOCTYPE public identifier (double-quoted) state + //------------------------------------------------------------------ + _stateDoctypePublicIdentifierDoubleQuoted(cp) { + const token = this.currentToken; + switch (cp) { + case $.QUOTATION_MARK: { + this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.publicId += REPLACEMENT_CHARACTER; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.abruptDoctypePublicIdentifier); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + token.publicId += String.fromCodePoint(cp); + } + } + } + // DOCTYPE public identifier (single-quoted) state + //------------------------------------------------------------------ + _stateDoctypePublicIdentifierSingleQuoted(cp) { + const token = this.currentToken; + switch (cp) { + case $.APOSTROPHE: { + this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.publicId += REPLACEMENT_CHARACTER; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.abruptDoctypePublicIdentifier); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + token.publicId += String.fromCodePoint(cp); + } + } + } + // After DOCTYPE public identifier state + //------------------------------------------------------------------ + _stateAfterDoctypePublicIdentifier(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; + break; + } + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.QUOTATION_MARK: { + this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // Between DOCTYPE public and system identifiers state + //------------------------------------------------------------------ + _stateBetweenDoctypePublicAndSystemIdentifiers(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.GREATER_THAN_SIGN: { + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.QUOTATION_MARK: { + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // After DOCTYPE system keyword state + //------------------------------------------------------------------ + _stateAfterDoctypeSystemKeyword(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; + break; + } + case $.QUOTATION_MARK: { + this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword); + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingDoctypeSystemIdentifier); + token.forceQuirks = true; + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // Before DOCTYPE system identifier state + //------------------------------------------------------------------ + _stateBeforeDoctypeSystemIdentifier(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.QUOTATION_MARK: { + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + } + case $.APOSTROPHE: { + token.systemId = ''; + this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.missingDoctypeSystemIdentifier); + token.forceQuirks = true; + this.state = State.DATA; + this.emitCurrentDoctype(token); + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier); + token.forceQuirks = true; + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // DOCTYPE system identifier (double-quoted) state + //------------------------------------------------------------------ + _stateDoctypeSystemIdentifierDoubleQuoted(cp) { + const token = this.currentToken; + switch (cp) { + case $.QUOTATION_MARK: { + this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.systemId += REPLACEMENT_CHARACTER; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.abruptDoctypeSystemIdentifier); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + token.systemId += String.fromCodePoint(cp); + } + } + } + // DOCTYPE system identifier (single-quoted) state + //------------------------------------------------------------------ + _stateDoctypeSystemIdentifierSingleQuoted(cp) { + const token = this.currentToken; + switch (cp) { + case $.APOSTROPHE: { + this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + token.systemId += REPLACEMENT_CHARACTER; + break; + } + case $.GREATER_THAN_SIGN: { + this._err(ERR.abruptDoctypeSystemIdentifier); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + token.systemId += String.fromCodePoint(cp); + } + } + } + // After DOCTYPE system identifier state + //------------------------------------------------------------------ + _stateAfterDoctypeSystemIdentifier(cp) { + const token = this.currentToken; + switch (cp) { + case $.SPACE: + case $.LINE_FEED: + case $.TABULATION: + case $.FORM_FEED: { + // Ignore whitespace + break; + } + case $.GREATER_THAN_SIGN: { + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.EOF: { + this._err(ERR.eofInDoctype); + token.forceQuirks = true; + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: { + this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier); + this.state = State.BOGUS_DOCTYPE; + this._stateBogusDoctype(cp); + } + } + } + // Bogus DOCTYPE state + //------------------------------------------------------------------ + _stateBogusDoctype(cp) { + const token = this.currentToken; + switch (cp) { + case $.GREATER_THAN_SIGN: { + this.emitCurrentDoctype(token); + this.state = State.DATA; + break; + } + case $.NULL: { + this._err(ERR.unexpectedNullCharacter); + break; + } + case $.EOF: { + this.emitCurrentDoctype(token); + this._emitEOFToken(); + break; + } + default: + // Do nothing + } + } + // CDATA section state + //------------------------------------------------------------------ + _stateCdataSection(cp) { + switch (cp) { + case $.RIGHT_SQUARE_BRACKET: { + this.state = State.CDATA_SECTION_BRACKET; + break; + } + case $.EOF: { + this._err(ERR.eofInCdata); + this._emitEOFToken(); + break; + } + default: { + this._emitCodePoint(cp); + } + } + } + // CDATA section bracket state + //------------------------------------------------------------------ + _stateCdataSectionBracket(cp) { + if (cp === $.RIGHT_SQUARE_BRACKET) { + this.state = State.CDATA_SECTION_END; + } + else { + this._emitChars(']'); + this.state = State.CDATA_SECTION; + this._stateCdataSection(cp); + } + } + // CDATA section end state + //------------------------------------------------------------------ + _stateCdataSectionEnd(cp) { + switch (cp) { + case $.GREATER_THAN_SIGN: { + this.state = State.DATA; + break; + } + case $.RIGHT_SQUARE_BRACKET: { + this._emitChars(']'); + break; + } + default: { + this._emitChars(']]'); + this.state = State.CDATA_SECTION; + this._stateCdataSection(cp); + } + } + } + // Character reference state + //------------------------------------------------------------------ + _stateCharacterReference(cp) { + if (cp === $.NUMBER_SIGN) { + this.state = State.NUMERIC_CHARACTER_REFERENCE; + } + else if (isAsciiAlphaNumeric(cp)) { + this.state = State.NAMED_CHARACTER_REFERENCE; + this._stateNamedCharacterReference(cp); + } + else { + this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); + this._reconsumeInState(this.returnState, cp); + } + } + // Named character reference state + //------------------------------------------------------------------ + _stateNamedCharacterReference(cp) { + const matchResult = this._matchNamedCharacterReference(cp); + //NOTE: Matching can be abrupted by hibernation. In that case, match + //results are no longer valid and we will need to start over. + if (this._ensureHibernation()) { + // Stay in the state, try again. + } + else if (matchResult) { + for (let i = 0; i < matchResult.length; i++) { + this._flushCodePointConsumedAsCharacterReference(matchResult[i]); + } + this.state = this.returnState; + } + else { + this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); + this.state = State.AMBIGUOUS_AMPERSAND; + } + } + // Ambiguos ampersand state + //------------------------------------------------------------------ + _stateAmbiguousAmpersand(cp) { + if (isAsciiAlphaNumeric(cp)) { + this._flushCodePointConsumedAsCharacterReference(cp); + } + else { + if (cp === $.SEMICOLON) { + this._err(ERR.unknownNamedCharacterReference); + } + this._reconsumeInState(this.returnState, cp); + } + } + // Numeric character reference state + //------------------------------------------------------------------ + _stateNumericCharacterReference(cp) { + this.charRefCode = 0; + if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) { + this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START; + } + // Inlined decimal character reference start state + else if (isAsciiDigit(cp)) { + this.state = State.DECIMAL_CHARACTER_REFERENCE; + this._stateDecimalCharacterReference(cp); + } + else { + this._err(ERR.absenceOfDigitsInNumericCharacterReference); + this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); + this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN); + this._reconsumeInState(this.returnState, cp); + } + } + // Hexademical character reference start state + //------------------------------------------------------------------ + _stateHexademicalCharacterReferenceStart(cp) { + if (isAsciiHexDigit(cp)) { + this.state = State.HEXADEMICAL_CHARACTER_REFERENCE; + this._stateHexademicalCharacterReference(cp); + } + else { + this._err(ERR.absenceOfDigitsInNumericCharacterReference); + this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); + this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN); + this._unconsume(2); + this.state = this.returnState; + } + } + // Hexademical character reference state + //------------------------------------------------------------------ + _stateHexademicalCharacterReference(cp) { + if (isAsciiUpperHexDigit(cp)) { + this.charRefCode = this.charRefCode * 16 + cp - 0x37; + } + else if (isAsciiLowerHexDigit(cp)) { + this.charRefCode = this.charRefCode * 16 + cp - 0x57; + } + else if (isAsciiDigit(cp)) { + this.charRefCode = this.charRefCode * 16 + cp - 0x30; + } + else if (cp === $.SEMICOLON) { + this.state = State.NUMERIC_CHARACTER_REFERENCE_END; + } + else { + this._err(ERR.missingSemicolonAfterCharacterReference); + this.state = State.NUMERIC_CHARACTER_REFERENCE_END; + this._stateNumericCharacterReferenceEnd(cp); + } + } + // Decimal character reference state + //------------------------------------------------------------------ + _stateDecimalCharacterReference(cp) { + if (isAsciiDigit(cp)) { + this.charRefCode = this.charRefCode * 10 + cp - 0x30; + } + else if (cp === $.SEMICOLON) { + this.state = State.NUMERIC_CHARACTER_REFERENCE_END; + } + else { + this._err(ERR.missingSemicolonAfterCharacterReference); + this.state = State.NUMERIC_CHARACTER_REFERENCE_END; + this._stateNumericCharacterReferenceEnd(cp); + } + } + // Numeric character reference end state + //------------------------------------------------------------------ + _stateNumericCharacterReferenceEnd(cp) { + if (this.charRefCode === $.NULL) { + this._err(ERR.nullCharacterReference); + this.charRefCode = $.REPLACEMENT_CHARACTER; + } + else if (this.charRefCode > 1114111) { + this._err(ERR.characterReferenceOutsideUnicodeRange); + this.charRefCode = $.REPLACEMENT_CHARACTER; + } + else if (isSurrogate(this.charRefCode)) { + this._err(ERR.surrogateCharacterReference); + this.charRefCode = $.REPLACEMENT_CHARACTER; + } + else if (isUndefinedCodePoint(this.charRefCode)) { + this._err(ERR.noncharacterCharacterReference); + } + else if (isControlCodePoint(this.charRefCode) || this.charRefCode === $.CARRIAGE_RETURN) { + this._err(ERR.controlCharacterReference); + const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode); + if (replacement !== undefined) { + this.charRefCode = replacement; + } + } + this._flushCodePointConsumedAsCharacterReference(this.charRefCode); + this._reconsumeInState(this.returnState, cp); + } +} +//# sourceMappingURL=index.js.map
\ No newline at end of file |