From 99c1d9af689e5325f3cf535c4007b3aeb8325229 Mon Sep 17 00:00:00 2001 From: Minteck Date: Tue, 10 Jan 2023 14:54:04 +0100 Subject: Update - This is an automated commit --- school/node_modules/parse5/lib/parser/index.js | 2956 ++++++++++++++++++++++++ 1 file changed, 2956 insertions(+) create mode 100644 school/node_modules/parse5/lib/parser/index.js (limited to 'school/node_modules/parse5/lib/parser/index.js') diff --git a/school/node_modules/parse5/lib/parser/index.js b/school/node_modules/parse5/lib/parser/index.js new file mode 100644 index 0000000..45d3e83 --- /dev/null +++ b/school/node_modules/parse5/lib/parser/index.js @@ -0,0 +1,2956 @@ +'use strict'; + +const Tokenizer = require('../tokenizer'); +const OpenElementStack = require('./open-element-stack'); +const FormattingElementList = require('./formatting-element-list'); +const LocationInfoParserMixin = require('../extensions/location-info/parser-mixin'); +const ErrorReportingParserMixin = require('../extensions/error-reporting/parser-mixin'); +const Mixin = require('../utils/mixin'); +const defaultTreeAdapter = require('../tree-adapters/default'); +const mergeOptions = require('../utils/merge-options'); +const doctype = require('../common/doctype'); +const foreignContent = require('../common/foreign-content'); +const ERR = require('../common/error-codes'); +const unicode = require('../common/unicode'); +const HTML = require('../common/html'); + +//Aliases +const $ = HTML.TAG_NAMES; +const NS = HTML.NAMESPACES; +const ATTRS = HTML.ATTRS; + +const DEFAULT_OPTIONS = { + scriptingEnabled: true, + sourceCodeLocationInfo: false, + onParseError: null, + treeAdapter: defaultTreeAdapter +}; + +//Misc constants +const HIDDEN_INPUT_TYPE = 'hidden'; + +//Adoption agency loops iteration count +const AA_OUTER_LOOP_ITER = 8; +const AA_INNER_LOOP_ITER = 3; + +//Insertion modes +const INITIAL_MODE = 'INITIAL_MODE'; +const BEFORE_HTML_MODE = 'BEFORE_HTML_MODE'; +const BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE'; +const IN_HEAD_MODE = 'IN_HEAD_MODE'; +const IN_HEAD_NO_SCRIPT_MODE = 'IN_HEAD_NO_SCRIPT_MODE'; +const AFTER_HEAD_MODE = 'AFTER_HEAD_MODE'; +const IN_BODY_MODE = 'IN_BODY_MODE'; +const TEXT_MODE = 'TEXT_MODE'; +const IN_TABLE_MODE = 'IN_TABLE_MODE'; +const IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE'; +const IN_CAPTION_MODE = 'IN_CAPTION_MODE'; +const IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE'; +const IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE'; +const IN_ROW_MODE = 'IN_ROW_MODE'; +const IN_CELL_MODE = 'IN_CELL_MODE'; +const IN_SELECT_MODE = 'IN_SELECT_MODE'; +const IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE'; +const IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE'; +const AFTER_BODY_MODE = 'AFTER_BODY_MODE'; +const IN_FRAMESET_MODE = 'IN_FRAMESET_MODE'; +const AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE'; +const AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE'; +const AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE'; + +//Insertion mode reset map +const INSERTION_MODE_RESET_MAP = { + [$.TR]: IN_ROW_MODE, + [$.TBODY]: IN_TABLE_BODY_MODE, + [$.THEAD]: IN_TABLE_BODY_MODE, + [$.TFOOT]: IN_TABLE_BODY_MODE, + [$.CAPTION]: IN_CAPTION_MODE, + [$.COLGROUP]: IN_COLUMN_GROUP_MODE, + [$.TABLE]: IN_TABLE_MODE, + [$.BODY]: IN_BODY_MODE, + [$.FRAMESET]: IN_FRAMESET_MODE +}; + +//Template insertion mode switch map +const TEMPLATE_INSERTION_MODE_SWITCH_MAP = { + [$.CAPTION]: IN_TABLE_MODE, + [$.COLGROUP]: IN_TABLE_MODE, + [$.TBODY]: IN_TABLE_MODE, + [$.TFOOT]: IN_TABLE_MODE, + [$.THEAD]: IN_TABLE_MODE, + [$.COL]: IN_COLUMN_GROUP_MODE, + [$.TR]: IN_TABLE_BODY_MODE, + [$.TD]: IN_ROW_MODE, + [$.TH]: IN_ROW_MODE +}; + +//Token handlers map for insertion modes +const TOKEN_HANDLERS = { + [INITIAL_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInInitialMode, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInInitialMode, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: doctypeInInitialMode, + [Tokenizer.START_TAG_TOKEN]: tokenInInitialMode, + [Tokenizer.END_TAG_TOKEN]: tokenInInitialMode, + [Tokenizer.EOF_TOKEN]: tokenInInitialMode + }, + [BEFORE_HTML_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHtml, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHtml, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagBeforeHtml, + [Tokenizer.END_TAG_TOKEN]: endTagBeforeHtml, + [Tokenizer.EOF_TOKEN]: tokenBeforeHtml + }, + [BEFORE_HEAD_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHead, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHead, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagBeforeHead, + [Tokenizer.END_TAG_TOKEN]: endTagBeforeHead, + [Tokenizer.EOF_TOKEN]: tokenBeforeHead + }, + [IN_HEAD_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInHead, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHead, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagInHead, + [Tokenizer.END_TAG_TOKEN]: endTagInHead, + [Tokenizer.EOF_TOKEN]: tokenInHead + }, + [IN_HEAD_NO_SCRIPT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInHeadNoScript, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHeadNoScript, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagInHeadNoScript, + [Tokenizer.END_TAG_TOKEN]: endTagInHeadNoScript, + [Tokenizer.EOF_TOKEN]: tokenInHeadNoScript + }, + [AFTER_HEAD_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenAfterHead, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterHead, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagAfterHead, + [Tokenizer.END_TAG_TOKEN]: endTagAfterHead, + [Tokenizer.EOF_TOKEN]: tokenAfterHead + }, + [IN_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInBody, + [Tokenizer.END_TAG_TOKEN]: endTagInBody, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [TEXT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.NULL_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: ignoreToken, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: ignoreToken, + [Tokenizer.END_TAG_TOKEN]: endTagInText, + [Tokenizer.EOF_TOKEN]: eofInText + }, + [IN_TABLE_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTable, + [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInTable, + [Tokenizer.END_TAG_TOKEN]: endTagInTable, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_TABLE_TEXT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTableText, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInTableText, + [Tokenizer.COMMENT_TOKEN]: tokenInTableText, + [Tokenizer.DOCTYPE_TOKEN]: tokenInTableText, + [Tokenizer.START_TAG_TOKEN]: tokenInTableText, + [Tokenizer.END_TAG_TOKEN]: tokenInTableText, + [Tokenizer.EOF_TOKEN]: tokenInTableText + }, + [IN_CAPTION_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInCaption, + [Tokenizer.END_TAG_TOKEN]: endTagInCaption, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_COLUMN_GROUP_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInColumnGroup, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInColumnGroup, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInColumnGroup, + [Tokenizer.END_TAG_TOKEN]: endTagInColumnGroup, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_TABLE_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTable, + [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInTableBody, + [Tokenizer.END_TAG_TOKEN]: endTagInTableBody, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_ROW_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTable, + [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInRow, + [Tokenizer.END_TAG_TOKEN]: endTagInRow, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_CELL_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInCell, + [Tokenizer.END_TAG_TOKEN]: endTagInCell, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_SELECT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInSelect, + [Tokenizer.END_TAG_TOKEN]: endTagInSelect, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_SELECT_IN_TABLE_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInSelectInTable, + [Tokenizer.END_TAG_TOKEN]: endTagInSelectInTable, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_TEMPLATE_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInTemplate, + [Tokenizer.END_TAG_TOKEN]: endTagInTemplate, + [Tokenizer.EOF_TOKEN]: eofInTemplate + }, + [AFTER_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenAfterBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterBody, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendCommentToRootHtmlElement, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterBody, + [Tokenizer.END_TAG_TOKEN]: endTagAfterBody, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [IN_FRAMESET_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInFrameset, + [Tokenizer.END_TAG_TOKEN]: endTagInFrameset, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [AFTER_FRAMESET_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterFrameset, + [Tokenizer.END_TAG_TOKEN]: endTagAfterFrameset, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [AFTER_AFTER_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenAfterAfterBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterAfterBody, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendCommentToDocument, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterAfterBody, + [Tokenizer.END_TAG_TOKEN]: tokenAfterAfterBody, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [AFTER_AFTER_FRAMESET_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendCommentToDocument, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterAfterFrameset, + [Tokenizer.END_TAG_TOKEN]: ignoreToken, + [Tokenizer.EOF_TOKEN]: stopParsing + } +}; + +//Parser +class Parser { + constructor(options) { + this.options = mergeOptions(DEFAULT_OPTIONS, options); + + this.treeAdapter = this.options.treeAdapter; + this.pendingScript = null; + + if (this.options.sourceCodeLocationInfo) { + Mixin.install(this, LocationInfoParserMixin); + } + + if (this.options.onParseError) { + Mixin.install(this, ErrorReportingParserMixin, { onParseError: this.options.onParseError }); + } + } + + // API + parse(html) { + const document = this.treeAdapter.createDocument(); + + this._bootstrap(document, null); + this.tokenizer.write(html, true); + this._runParsingLoop(null); + + return document; + } + + parseFragment(html, fragmentContext) { + //NOTE: use