diff options
Diffstat (limited to 'includes/external/addressbook/node_modules/htmlparser2/lib/Tokenizer.js')
-rw-r--r-- | includes/external/addressbook/node_modules/htmlparser2/lib/Tokenizer.js | 938 |
1 files changed, 0 insertions, 938 deletions
diff --git a/includes/external/addressbook/node_modules/htmlparser2/lib/Tokenizer.js b/includes/external/addressbook/node_modules/htmlparser2/lib/Tokenizer.js deleted file mode 100644 index 43863cc..0000000 --- a/includes/external/addressbook/node_modules/htmlparser2/lib/Tokenizer.js +++ /dev/null @@ -1,938 +0,0 @@ -"use strict"; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.QuoteType = void 0; -var decode_js_1 = require("entities/lib/decode.js"); -var CharCodes; -(function (CharCodes) { - CharCodes[CharCodes["Tab"] = 9] = "Tab"; - CharCodes[CharCodes["NewLine"] = 10] = "NewLine"; - CharCodes[CharCodes["FormFeed"] = 12] = "FormFeed"; - CharCodes[CharCodes["CarriageReturn"] = 13] = "CarriageReturn"; - CharCodes[CharCodes["Space"] = 32] = "Space"; - CharCodes[CharCodes["ExclamationMark"] = 33] = "ExclamationMark"; - CharCodes[CharCodes["Number"] = 35] = "Number"; - CharCodes[CharCodes["Amp"] = 38] = "Amp"; - CharCodes[CharCodes["SingleQuote"] = 39] = "SingleQuote"; - CharCodes[CharCodes["DoubleQuote"] = 34] = "DoubleQuote"; - CharCodes[CharCodes["Dash"] = 45] = "Dash"; - CharCodes[CharCodes["Slash"] = 47] = "Slash"; - CharCodes[CharCodes["Zero"] = 48] = "Zero"; - CharCodes[CharCodes["Nine"] = 57] = "Nine"; - CharCodes[CharCodes["Semi"] = 59] = "Semi"; - CharCodes[CharCodes["Lt"] = 60] = "Lt"; - CharCodes[CharCodes["Eq"] = 61] = "Eq"; - CharCodes[CharCodes["Gt"] = 62] = "Gt"; - CharCodes[CharCodes["Questionmark"] = 63] = "Questionmark"; - CharCodes[CharCodes["UpperA"] = 65] = "UpperA"; - CharCodes[CharCodes["LowerA"] = 97] = "LowerA"; - CharCodes[CharCodes["UpperF"] = 70] = "UpperF"; - CharCodes[CharCodes["LowerF"] = 102] = "LowerF"; - CharCodes[CharCodes["UpperZ"] = 90] = "UpperZ"; - CharCodes[CharCodes["LowerZ"] = 122] = "LowerZ"; - CharCodes[CharCodes["LowerX"] = 120] = "LowerX"; - CharCodes[CharCodes["OpeningSquareBracket"] = 91] = "OpeningSquareBracket"; -})(CharCodes || (CharCodes = {})); -/** All the states the tokenizer can be in. */ -var State; -(function (State) { - State[State["Text"] = 1] = "Text"; - State[State["BeforeTagName"] = 2] = "BeforeTagName"; - State[State["InTagName"] = 3] = "InTagName"; - State[State["InSelfClosingTag"] = 4] = "InSelfClosingTag"; - State[State["BeforeClosingTagName"] = 5] = "BeforeClosingTagName"; - State[State["InClosingTagName"] = 6] = "InClosingTagName"; - State[State["AfterClosingTagName"] = 7] = "AfterClosingTagName"; - // Attributes - State[State["BeforeAttributeName"] = 8] = "BeforeAttributeName"; - State[State["InAttributeName"] = 9] = "InAttributeName"; - State[State["AfterAttributeName"] = 10] = "AfterAttributeName"; - State[State["BeforeAttributeValue"] = 11] = "BeforeAttributeValue"; - State[State["InAttributeValueDq"] = 12] = "InAttributeValueDq"; - State[State["InAttributeValueSq"] = 13] = "InAttributeValueSq"; - State[State["InAttributeValueNq"] = 14] = "InAttributeValueNq"; - // Declarations - State[State["BeforeDeclaration"] = 15] = "BeforeDeclaration"; - State[State["InDeclaration"] = 16] = "InDeclaration"; - // Processing instructions - State[State["InProcessingInstruction"] = 17] = "InProcessingInstruction"; - // Comments & CDATA - State[State["BeforeComment"] = 18] = "BeforeComment"; - State[State["CDATASequence"] = 19] = "CDATASequence"; - State[State["InSpecialComment"] = 20] = "InSpecialComment"; - State[State["InCommentLike"] = 21] = "InCommentLike"; - // Special tags - State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS"; - State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence"; - State[State["InSpecialTag"] = 24] = "InSpecialTag"; - State[State["BeforeEntity"] = 25] = "BeforeEntity"; - State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity"; - State[State["InNamedEntity"] = 27] = "InNamedEntity"; - State[State["InNumericEntity"] = 28] = "InNumericEntity"; - State[State["InHexEntity"] = 29] = "InHexEntity"; -})(State || (State = {})); -function isWhitespace(c) { - return (c === CharCodes.Space || - c === CharCodes.NewLine || - c === CharCodes.Tab || - c === CharCodes.FormFeed || - c === CharCodes.CarriageReturn); -} -function isEndOfTagSection(c) { - return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c); -} -function isNumber(c) { - return c >= CharCodes.Zero && c <= CharCodes.Nine; -} -function isASCIIAlpha(c) { - return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) || - (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)); -} -function isHexDigit(c) { - return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) || - (c >= CharCodes.LowerA && c <= CharCodes.LowerF)); -} -var QuoteType; -(function (QuoteType) { - QuoteType[QuoteType["NoValue"] = 0] = "NoValue"; - QuoteType[QuoteType["Unquoted"] = 1] = "Unquoted"; - QuoteType[QuoteType["Single"] = 2] = "Single"; - QuoteType[QuoteType["Double"] = 3] = "Double"; -})(QuoteType = exports.QuoteType || (exports.QuoteType = {})); -/** - * Sequences used to match longer strings. - * - * We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End - * sequences with an increased offset. - */ -var Sequences = { - Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), - CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), - CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), - ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), - StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), - TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title` -}; -var Tokenizer = /** @class */ (function () { - function Tokenizer(_a, cbs) { - var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c; - this.cbs = cbs; - /** The current state the tokenizer is in. */ - this.state = State.Text; - /** The read buffer. */ - this.buffer = ""; - /** The beginning of the section that is currently being read. */ - this.sectionStart = 0; - /** The index within the buffer that we are currently looking at. */ - this.index = 0; - /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ - this.baseState = State.Text; - /** For special parsing behavior inside of script and style tags. */ - this.isSpecial = false; - /** Indicates whether the tokenizer has been paused. */ - this.running = true; - /** The offset of the current buffer. */ - this.offset = 0; - this.currentSequence = undefined; - this.sequenceIndex = 0; - this.trieIndex = 0; - this.trieCurrent = 0; - /** For named entities, the index of the value. For numeric entities, the code point. */ - this.entityResult = 0; - this.entityExcess = 0; - this.xmlMode = xmlMode; - this.decodeEntities = decodeEntities; - this.entityTrie = xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree; - } - Tokenizer.prototype.reset = function () { - this.state = State.Text; - this.buffer = ""; - this.sectionStart = 0; - this.index = 0; - this.baseState = State.Text; - this.currentSequence = undefined; - this.running = true; - this.offset = 0; - }; - Tokenizer.prototype.write = function (chunk) { - this.offset += this.buffer.length; - this.buffer = chunk; - this.parse(); - }; - Tokenizer.prototype.end = function () { - if (this.running) - this.finish(); - }; - Tokenizer.prototype.pause = function () { - this.running = false; - }; - Tokenizer.prototype.resume = function () { - this.running = true; - if (this.index < this.buffer.length + this.offset) { - this.parse(); - } - }; - /** - * The current index within all of the written data. - */ - Tokenizer.prototype.getIndex = function () { - return this.index; - }; - /** - * The start of the current section. - */ - Tokenizer.prototype.getSectionStart = function () { - return this.sectionStart; - }; - Tokenizer.prototype.stateText = function (c) { - if (c === CharCodes.Lt || - (!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))) { - if (this.index > this.sectionStart) { - this.cbs.ontext(this.sectionStart, this.index); - } - this.state = State.BeforeTagName; - this.sectionStart = this.index; - } - else if (this.decodeEntities && c === CharCodes.Amp) { - this.state = State.BeforeEntity; - } - }; - Tokenizer.prototype.stateSpecialStartSequence = function (c) { - var isEnd = this.sequenceIndex === this.currentSequence.length; - var isMatch = isEnd - ? // If we are at the end of the sequence, make sure the tag name has ended - isEndOfTagSection(c) - : // Otherwise, do a case-insensitive comparison - (c | 0x20) === this.currentSequence[this.sequenceIndex]; - if (!isMatch) { - this.isSpecial = false; - } - else if (!isEnd) { - this.sequenceIndex++; - return; - } - this.sequenceIndex = 0; - this.state = State.InTagName; - this.stateInTagName(c); - }; - /** Look for an end tag. For <title> tags, also decode entities. */ - Tokenizer.prototype.stateInSpecialTag = function (c) { - if (this.sequenceIndex === this.currentSequence.length) { - if (c === CharCodes.Gt || isWhitespace(c)) { - var endOfText = this.index - this.currentSequence.length; - if (this.sectionStart < endOfText) { - // Spoof the index so that reported locations match up. - var actualIndex = this.index; - this.index = endOfText; - this.cbs.ontext(this.sectionStart, endOfText); - this.index = actualIndex; - } - this.isSpecial = false; - this.sectionStart = endOfText + 2; // Skip over the `</` - this.stateInClosingTagName(c); - return; // We are done; skip the rest of the function. - } - this.sequenceIndex = 0; - } - if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) { - this.sequenceIndex += 1; - } - else if (this.sequenceIndex === 0) { - if (this.currentSequence === Sequences.TitleEnd) { - // We have to parse entities in <title> tags. - if (this.decodeEntities && c === CharCodes.Amp) { - this.state = State.BeforeEntity; - } - } - else if (this.fastForwardTo(CharCodes.Lt)) { - // Outside of <title> tags, we can fast-forward. - this.sequenceIndex = 1; - } - } - else { - // If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`. - this.sequenceIndex = Number(c === CharCodes.Lt); - } - }; - Tokenizer.prototype.stateCDATASequence = function (c) { - if (c === Sequences.Cdata[this.sequenceIndex]) { - if (++this.sequenceIndex === Sequences.Cdata.length) { - this.state = State.InCommentLike; - this.currentSequence = Sequences.CdataEnd; - this.sequenceIndex = 0; - this.sectionStart = this.index + 1; - } - } - else { - this.sequenceIndex = 0; - this.state = State.InDeclaration; - this.stateInDeclaration(c); // Reconsume the character - } - }; - /** - * When we wait for one specific character, we can speed things up - * by skipping through the buffer until we find it. - * - * @returns Whether the character was found. - */ - Tokenizer.prototype.fastForwardTo = function (c) { - while (++this.index < this.buffer.length + this.offset) { - if (this.buffer.charCodeAt(this.index - this.offset) === c) { - return true; - } - } - /* - * We increment the index at the end of the `parse` loop, - * so set it to `buffer.length - 1` here. - * - * TODO: Refactor `parse` to increment index before calling states. - */ - this.index = this.buffer.length + this.offset - 1; - return false; - }; - /** - * Comments and CDATA end with `-->` and `]]>`. - * - * Their common qualities are: - * - Their end sequences have a distinct character they start with. - * - That character is then repeated, so we have to check multiple repeats. - * - All characters but the start character of the sequence can be skipped. - */ - Tokenizer.prototype.stateInCommentLike = function (c) { - if (c === this.currentSequence[this.sequenceIndex]) { - if (++this.sequenceIndex === this.currentSequence.length) { - if (this.currentSequence === Sequences.CdataEnd) { - this.cbs.oncdata(this.sectionStart, this.index, 2); - } - else { - this.cbs.oncomment(this.sectionStart, this.index, 2); - } - this.sequenceIndex = 0; - this.sectionStart = this.index + 1; - this.state = State.Text; - } - } - else if (this.sequenceIndex === 0) { - // Fast-forward to the first character of the sequence - if (this.fastForwardTo(this.currentSequence[0])) { - this.sequenceIndex = 1; - } - } - else if (c !== this.currentSequence[this.sequenceIndex - 1]) { - // Allow long sequences, eg. --->, ]]]> - this.sequenceIndex = 0; - } - }; - /** - * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. - * - * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). - * We allow anything that wouldn't end the tag. - */ - Tokenizer.prototype.isTagStartChar = function (c) { - return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c); - }; - Tokenizer.prototype.startSpecial = function (sequence, offset) { - this.isSpecial = true; - this.currentSequence = sequence; - this.sequenceIndex = offset; - this.state = State.SpecialStartSequence; - }; - Tokenizer.prototype.stateBeforeTagName = function (c) { - if (c === CharCodes.ExclamationMark) { - this.state = State.BeforeDeclaration; - this.sectionStart = this.index + 1; - } - else if (c === CharCodes.Questionmark) { - this.state = State.InProcessingInstruction; - this.sectionStart = this.index + 1; - } - else if (this.isTagStartChar(c)) { - var lower = c | 0x20; - this.sectionStart = this.index; - if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { - this.startSpecial(Sequences.TitleEnd, 3); - } - else { - this.state = - !this.xmlMode && lower === Sequences.ScriptEnd[2] - ? State.BeforeSpecialS - : State.InTagName; - } - } - else if (c === CharCodes.Slash) { - this.state = State.BeforeClosingTagName; - } - else { - this.state = State.Text; - this.stateText(c); - } - }; - Tokenizer.prototype.stateInTagName = function (c) { - if (isEndOfTagSection(c)) { - this.cbs.onopentagname(this.sectionStart, this.index); - this.sectionStart = -1; - this.state = State.BeforeAttributeName; - this.stateBeforeAttributeName(c); - } - }; - Tokenizer.prototype.stateBeforeClosingTagName = function (c) { - if (isWhitespace(c)) { - // Ignore - } - else if (c === CharCodes.Gt) { - this.state = State.Text; - } - else { - this.state = this.isTagStartChar(c) - ? State.InClosingTagName - : State.InSpecialComment; - this.sectionStart = this.index; - } - }; - Tokenizer.prototype.stateInClosingTagName = function (c) { - if (c === CharCodes.Gt || isWhitespace(c)) { - this.cbs.onclosetag(this.sectionStart, this.index); - this.sectionStart = -1; - this.state = State.AfterClosingTagName; - this.stateAfterClosingTagName(c); - } - }; - Tokenizer.prototype.stateAfterClosingTagName = function (c) { - // Skip everything until ">" - if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { - this.state = State.Text; - this.baseState = State.Text; - this.sectionStart = this.index + 1; - } - }; - Tokenizer.prototype.stateBeforeAttributeName = function (c) { - if (c === CharCodes.Gt) { - this.cbs.onopentagend(this.index); - if (this.isSpecial) { - this.state = State.InSpecialTag; - this.sequenceIndex = 0; - } - else { - this.state = State.Text; - } - this.baseState = this.state; - this.sectionStart = this.index + 1; - } - else if (c === CharCodes.Slash) { - this.state = State.InSelfClosingTag; - } - else if (!isWhitespace(c)) { - this.state = State.InAttributeName; - this.sectionStart = this.index; - } - }; - Tokenizer.prototype.stateInSelfClosingTag = function (c) { - if (c === CharCodes.Gt) { - this.cbs.onselfclosingtag(this.index); - this.state = State.Text; - this.baseState = State.Text; - this.sectionStart = this.index + 1; - this.isSpecial = false; // Reset special state, in case of self-closing special tags - } - else if (!isWhitespace(c)) { - this.state = State.BeforeAttributeName; - this.stateBeforeAttributeName(c); - } - }; - Tokenizer.prototype.stateInAttributeName = function (c) { - if (c === CharCodes.Eq || isEndOfTagSection(c)) { - this.cbs.onattribname(this.sectionStart, this.index); - this.sectionStart = -1; - this.state = State.AfterAttributeName; - this.stateAfterAttributeName(c); - } - }; - Tokenizer.prototype.stateAfterAttributeName = function (c) { - if (c === CharCodes.Eq) { - this.state = State.BeforeAttributeValue; - } - else if (c === CharCodes.Slash || c === CharCodes.Gt) { - this.cbs.onattribend(QuoteType.NoValue, this.index); - this.state = State.BeforeAttributeName; - this.stateBeforeAttributeName(c); - } - else if (!isWhitespace(c)) { - this.cbs.onattribend(QuoteType.NoValue, this.index); - this.state = State.InAttributeName; - this.sectionStart = this.index; - } - }; - Tokenizer.prototype.stateBeforeAttributeValue = function (c) { - if (c === CharCodes.DoubleQuote) { - this.state = State.InAttributeValueDq; - this.sectionStart = this.index + 1; - } - else if (c === CharCodes.SingleQuote) { - this.state = State.InAttributeValueSq; - this.sectionStart = this.index + 1; - } - else if (!isWhitespace(c)) { - this.sectionStart = this.index; - this.state = State.InAttributeValueNq; - this.stateInAttributeValueNoQuotes(c); // Reconsume token - } - }; - Tokenizer.prototype.handleInAttributeValue = function (c, quote) { - if (c === quote || - (!this.decodeEntities && this.fastForwardTo(quote))) { - this.cbs.onattribdata(this.sectionStart, this.index); - this.sectionStart = -1; - this.cbs.onattribend(quote === CharCodes.DoubleQuote - ? QuoteType.Double - : QuoteType.Single, this.index); - this.state = State.BeforeAttributeName; - } - else if (this.decodeEntities && c === CharCodes.Amp) { - this.baseState = this.state; - this.state = State.BeforeEntity; - } - }; - Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) { - this.handleInAttributeValue(c, CharCodes.DoubleQuote); - }; - Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) { - this.handleInAttributeValue(c, CharCodes.SingleQuote); - }; - Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) { - if (isWhitespace(c) || c === CharCodes.Gt) { - this.cbs.onattribdata(this.sectionStart, this.index); - this.sectionStart = -1; - this.cbs.onattribend(QuoteType.Unquoted, this.index); - this.state = State.BeforeAttributeName; - this.stateBeforeAttributeName(c); - } - else if (this.decodeEntities && c === CharCodes.Amp) { - this.baseState = this.state; - this.state = State.BeforeEntity; - } - }; - Tokenizer.prototype.stateBeforeDeclaration = function (c) { - if (c === CharCodes.OpeningSquareBracket) { - this.state = State.CDATASequence; - this.sequenceIndex = 0; - } - else { - this.state = - c === CharCodes.Dash - ? State.BeforeComment - : State.InDeclaration; - } - }; - Tokenizer.prototype.stateInDeclaration = function (c) { - if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { - this.cbs.ondeclaration(this.sectionStart, this.index); - this.state = State.Text; - this.sectionStart = this.index + 1; - } - }; - Tokenizer.prototype.stateInProcessingInstruction = function (c) { - if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { - this.cbs.onprocessinginstruction(this.sectionStart, this.index); - this.state = State.Text; - this.sectionStart = this.index + 1; - } - }; - Tokenizer.prototype.stateBeforeComment = function (c) { - if (c === CharCodes.Dash) { - this.state = State.InCommentLike; - this.currentSequence = Sequences.CommentEnd; - // Allow short comments (eg. <!-->) - this.sequenceIndex = 2; - this.sectionStart = this.index + 1; - } - else { - this.state = State.InDeclaration; - } - }; - Tokenizer.prototype.stateInSpecialComment = function (c) { - if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { - this.cbs.oncomment(this.sectionStart, this.index, 0); - this.state = State.Text; - this.sectionStart = this.index + 1; - } - }; - Tokenizer.prototype.stateBeforeSpecialS = function (c) { - var lower = c | 0x20; - if (lower === Sequences.ScriptEnd[3]) { - this.startSpecial(Sequences.ScriptEnd, 4); - } - else if (lower === Sequences.StyleEnd[3]) { - this.startSpecial(Sequences.StyleEnd, 4); - } - else { - this.state = State.InTagName; - this.stateInTagName(c); // Consume the token again - } - }; - Tokenizer.prototype.stateBeforeEntity = function (c) { - // Start excess with 1 to include the '&' - this.entityExcess = 1; - this.entityResult = 0; - if (c === CharCodes.Number) { - this.state = State.BeforeNumericEntity; - } - else if (c === CharCodes.Amp) { - // We have two `&` characters in a row. Stay in the current state. - } - else { - this.trieIndex = 0; - this.trieCurrent = this.entityTrie[0]; - this.state = State.InNamedEntity; - this.stateInNamedEntity(c); - } - }; - Tokenizer.prototype.stateInNamedEntity = function (c) { - this.entityExcess += 1; - this.trieIndex = (0, decode_js_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c); - if (this.trieIndex < 0) { - this.emitNamedEntity(); - this.index--; - return; - } - this.trieCurrent = this.entityTrie[this.trieIndex]; - var masked = this.trieCurrent & decode_js_1.BinTrieFlags.VALUE_LENGTH; - // If the branch is a value, store it and continue - if (masked) { - // The mask is the number of bytes of the value, including the current byte. - var valueLength = (masked >> 14) - 1; - // If we have a legacy entity while parsing strictly, just skip the number of bytes - if (!this.allowLegacyEntity() && c !== CharCodes.Semi) { - this.trieIndex += valueLength; - } - else { - // Add 1 as we have already incremented the excess - var entityStart = this.index - this.entityExcess + 1; - if (entityStart > this.sectionStart) { - this.emitPartial(this.sectionStart, entityStart); - } - // If this is a surrogate pair, consume the next two bytes - this.entityResult = this.trieIndex; - this.trieIndex += valueLength; - this.entityExcess = 0; - this.sectionStart = this.index + 1; - if (valueLength === 0) { - this.emitNamedEntity(); - } - } - } - }; - Tokenizer.prototype.emitNamedEntity = function () { - this.state = this.baseState; - if (this.entityResult === 0) { - return; - } - var valueLength = (this.entityTrie[this.entityResult] & decode_js_1.BinTrieFlags.VALUE_LENGTH) >> - 14; - switch (valueLength) { - case 1: { - this.emitCodePoint(this.entityTrie[this.entityResult] & - ~decode_js_1.BinTrieFlags.VALUE_LENGTH); - break; - } - case 2: { - this.emitCodePoint(this.entityTrie[this.entityResult + 1]); - break; - } - case 3: { - this.emitCodePoint(this.entityTrie[this.entityResult + 1]); - this.emitCodePoint(this.entityTrie[this.entityResult + 2]); - } - } - }; - Tokenizer.prototype.stateBeforeNumericEntity = function (c) { - if ((c | 0x20) === CharCodes.LowerX) { - this.entityExcess++; - this.state = State.InHexEntity; - } - else { - this.state = State.InNumericEntity; - this.stateInNumericEntity(c); - } - }; - Tokenizer.prototype.emitNumericEntity = function (strict) { - var entityStart = this.index - this.entityExcess - 1; - var numberStart = entityStart + 2 + Number(this.state === State.InHexEntity); - if (numberStart !== this.index) { - // Emit leading data if any - if (entityStart > this.sectionStart) { - this.emitPartial(this.sectionStart, entityStart); - } - this.sectionStart = this.index + Number(strict); - this.emitCodePoint((0, decode_js_1.replaceCodePoint)(this.entityResult)); - } - this.state = this.baseState; - }; - Tokenizer.prototype.stateInNumericEntity = function (c) { - if (c === CharCodes.Semi) { - this.emitNumericEntity(true); - } - else if (isNumber(c)) { - this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero); - this.entityExcess++; - } - else { - if (this.allowLegacyEntity()) { - this.emitNumericEntity(false); - } - else { - this.state = this.baseState; - } - this.index--; - } - }; - Tokenizer.prototype.stateInHexEntity = function (c) { - if (c === CharCodes.Semi) { - this.emitNumericEntity(true); - } - else if (isNumber(c)) { - this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero); - this.entityExcess++; - } - else if (isHexDigit(c)) { - this.entityResult = - this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10); - this.entityExcess++; - } - else { - if (this.allowLegacyEntity()) { - this.emitNumericEntity(false); - } - else { - this.state = this.baseState; - } - this.index--; - } - }; - Tokenizer.prototype.allowLegacyEntity = function () { - return (!this.xmlMode && - (this.baseState === State.Text || - this.baseState === State.InSpecialTag)); - }; - /** - * Remove data that has already been consumed from the buffer. - */ - Tokenizer.prototype.cleanup = function () { - // If we are inside of text or attributes, emit what we already have. - if (this.running && this.sectionStart !== this.index) { - if (this.state === State.Text || - (this.state === State.InSpecialTag && this.sequenceIndex === 0)) { - this.cbs.ontext(this.sectionStart, this.index); - this.sectionStart = this.index; - } - else if (this.state === State.InAttributeValueDq || - this.state === State.InAttributeValueSq || - this.state === State.InAttributeValueNq) { - this.cbs.onattribdata(this.sectionStart, this.index); - this.sectionStart = this.index; - } - } - }; - Tokenizer.prototype.shouldContinue = function () { - return this.index < this.buffer.length + this.offset && this.running; - }; - /** - * Iterates through the buffer, calling the function corresponding to the current state. - * - * States that are more likely to be hit are higher up, as a performance improvement. - */ - Tokenizer.prototype.parse = function () { - while (this.shouldContinue()) { - var c = this.buffer.charCodeAt(this.index - this.offset); - switch (this.state) { - case State.Text: { - this.stateText(c); - break; - } - case State.SpecialStartSequence: { - this.stateSpecialStartSequence(c); - break; - } - case State.InSpecialTag: { - this.stateInSpecialTag(c); - break; - } - case State.CDATASequence: { - this.stateCDATASequence(c); - break; - } - case State.InAttributeValueDq: { - this.stateInAttributeValueDoubleQuotes(c); - break; - } - case State.InAttributeName: { - this.stateInAttributeName(c); - break; - } - case State.InCommentLike: { - this.stateInCommentLike(c); - break; - } - case State.InSpecialComment: { - this.stateInSpecialComment(c); - break; - } - case State.BeforeAttributeName: { - this.stateBeforeAttributeName(c); - break; - } - case State.InTagName: { - this.stateInTagName(c); - break; - } - case State.InClosingTagName: { - this.stateInClosingTagName(c); - break; - } - case State.BeforeTagName: { - this.stateBeforeTagName(c); - break; - } - case State.AfterAttributeName: { - this.stateAfterAttributeName(c); - break; - } - case State.InAttributeValueSq: { - this.stateInAttributeValueSingleQuotes(c); - break; - } - case State.BeforeAttributeValue: { - this.stateBeforeAttributeValue(c); - break; - } - case State.BeforeClosingTagName: { - this.stateBeforeClosingTagName(c); - break; - } - case State.AfterClosingTagName: { - this.stateAfterClosingTagName(c); - break; - } - case State.BeforeSpecialS: { - this.stateBeforeSpecialS(c); - break; - } - case State.InAttributeValueNq: { - this.stateInAttributeValueNoQuotes(c); - break; - } - case State.InSelfClosingTag: { - this.stateInSelfClosingTag(c); - break; - } - case State.InDeclaration: { - this.stateInDeclaration(c); - break; - } - case State.BeforeDeclaration: { - this.stateBeforeDeclaration(c); - break; - } - case State.BeforeComment: { - this.stateBeforeComment(c); - break; - } - case State.InProcessingInstruction: { - this.stateInProcessingInstruction(c); - break; - } - case State.InNamedEntity: { - this.stateInNamedEntity(c); - break; - } - case State.BeforeEntity: { - this.stateBeforeEntity(c); - break; - } - case State.InHexEntity: { - this.stateInHexEntity(c); - break; - } - case State.InNumericEntity: { - this.stateInNumericEntity(c); - break; - } - default: { - // `this._state === State.BeforeNumericEntity` - this.stateBeforeNumericEntity(c); - } - } - this.index++; - } - this.cleanup(); - }; - Tokenizer.prototype.finish = function () { - if (this.state === State.InNamedEntity) { - this.emitNamedEntity(); - } - // If there is remaining data, emit it in a reasonable way - if (this.sectionStart < this.index) { - this.handleTrailingData(); - } - this.cbs.onend(); - }; - /** Handle any trailing data. */ - Tokenizer.prototype.handleTrailingData = function () { - var endIndex = this.buffer.length + this.offset; - if (this.state === State.InCommentLike) { - if (this.currentSequence === Sequences.CdataEnd) { - this.cbs.oncdata(this.sectionStart, endIndex, 0); - } - else { - this.cbs.oncomment(this.sectionStart, endIndex, 0); - } - } - else if (this.state === State.InNumericEntity && - this.allowLegacyEntity()) { - this.emitNumericEntity(false); - // All trailing data will have been consumed - } - else if (this.state === State.InHexEntity && - this.allowLegacyEntity()) { - this.emitNumericEntity(false); - // All trailing data will have been consumed - } - else if (this.state === State.InTagName || - this.state === State.BeforeAttributeName || - this.state === State.BeforeAttributeValue || - this.state === State.AfterAttributeName || - this.state === State.InAttributeName || - this.state === State.InAttributeValueSq || - this.state === State.InAttributeValueDq || - this.state === State.InAttributeValueNq || - this.state === State.InClosingTagName) { - /* - * If we are currently in an opening or closing tag, us not calling the - * respective callback signals that the tag should be ignored. - */ - } - else { - this.cbs.ontext(this.sectionStart, endIndex); - } - }; - Tokenizer.prototype.emitPartial = function (start, endIndex) { - if (this.baseState !== State.Text && - this.baseState !== State.InSpecialTag) { - this.cbs.onattribdata(start, endIndex); - } - else { - this.cbs.ontext(start, endIndex); - } - }; - Tokenizer.prototype.emitCodePoint = function (cp) { - if (this.baseState !== State.Text && - this.baseState !== State.InSpecialTag) { - this.cbs.onattribentity(cp); - } - else { - this.cbs.ontextentity(cp); - } - }; - return Tokenizer; -}()); -exports.default = Tokenizer; -//# sourceMappingURL=Tokenizer.js.map
\ No newline at end of file |