diff options
author | Minteck <contact@minteck.org> | 2021-12-21 15:25:09 +0100 |
---|---|---|
committer | Minteck <contact@minteck.org> | 2021-12-21 15:25:09 +0100 |
commit | e703e51c9c09b22e3bcda9a1faf1e05897f60616 (patch) | |
tree | 4fd67a209ad6988fbf569d7dff8bc37ba45baf95 /_mint/node_modules/js-beautify/js/src/javascript/tokenizer.js | |
download | mint-e703e51c9c09b22e3bcda9a1faf1e05897f60616.tar.gz mint-e703e51c9c09b22e3bcda9a1faf1e05897f60616.tar.bz2 mint-e703e51c9c09b22e3bcda9a1faf1e05897f60616.zip |
Initial commit
Diffstat (limited to '_mint/node_modules/js-beautify/js/src/javascript/tokenizer.js')
-rw-r--r-- | _mint/node_modules/js-beautify/js/src/javascript/tokenizer.js | 566 |
1 files changed, 566 insertions, 0 deletions
diff --git a/_mint/node_modules/js-beautify/js/src/javascript/tokenizer.js b/_mint/node_modules/js-beautify/js/src/javascript/tokenizer.js new file mode 100644 index 0000000..5710bc1 --- /dev/null +++ b/_mint/node_modules/js-beautify/js/src/javascript/tokenizer.js @@ -0,0 +1,566 @@ +/*jshint node:true */ +/* + + The MIT License (MIT) + + Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +'use strict'; + +var InputScanner = require('../core/inputscanner').InputScanner; +var BaseTokenizer = require('../core/tokenizer').Tokenizer; +var BASETOKEN = require('../core/tokenizer').TOKEN; +var Directives = require('../core/directives').Directives; +var acorn = require('./acorn'); +var Pattern = require('../core/pattern').Pattern; +var TemplatablePattern = require('../core/templatablepattern').TemplatablePattern; + + +function in_array(what, arr) { + return arr.indexOf(what) !== -1; +} + + +var TOKEN = { + START_EXPR: 'TK_START_EXPR', + END_EXPR: 'TK_END_EXPR', + START_BLOCK: 'TK_START_BLOCK', + END_BLOCK: 'TK_END_BLOCK', + WORD: 'TK_WORD', + RESERVED: 'TK_RESERVED', + SEMICOLON: 'TK_SEMICOLON', + STRING: 'TK_STRING', + EQUALS: 'TK_EQUALS', + OPERATOR: 'TK_OPERATOR', + COMMA: 'TK_COMMA', + BLOCK_COMMENT: 'TK_BLOCK_COMMENT', + COMMENT: 'TK_COMMENT', + DOT: 'TK_DOT', + UNKNOWN: 'TK_UNKNOWN', + START: BASETOKEN.START, + RAW: BASETOKEN.RAW, + EOF: BASETOKEN.EOF +}; + + +var directives_core = new Directives(/\/\*/, /\*\//); + +var number_pattern = /0[xX][0123456789abcdefABCDEF_]*n?|0[oO][01234567_]*n?|0[bB][01_]*n?|\d[\d_]*n|(?:\.\d[\d_]*|\d[\d_]*\.?[\d_]*)(?:[eE][+-]?[\d_]+)?/; + +var digit = /[0-9]/; + +// Dot "." must be distinguished from "..." and decimal +var dot_pattern = /[^\d\.]/; + +var positionable_operators = ( + ">>> === !== " + + "<< && >= ** != == <= >> || ?? |> " + + "< / - + > : & % ? ^ | *").split(' '); + +// IMPORTANT: this must be sorted longest to shortest or tokenizing many not work. +// Also, you must update possitionable operators separately from punct +var punct = + ">>>= " + + "... >>= <<= === >>> !== **= " + + "=> ^= :: /= << <= == && -= >= >> != -- += ** || ?? ++ %= &= *= |= |> " + + "= ! ? > < : / ^ - + * & % ~ |"; + +punct = punct.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&"); +// ?. but not if followed by a number +punct = '\\?\\.(?!\\d) ' + punct; +punct = punct.replace(/ /g, '|'); + +var punct_pattern = new RegExp(punct); + +// words which should always start on new line. +var line_starters = 'continue,try,throw,return,var,let,const,if,switch,case,default,for,while,break,function,import,export'.split(','); +var reserved_words = line_starters.concat(['do', 'in', 'of', 'else', 'get', 'set', 'new', 'catch', 'finally', 'typeof', 'yield', 'async', 'await', 'from', 'as']); +var reserved_word_pattern = new RegExp('^(?:' + reserved_words.join('|') + ')$'); + +// var template_pattern = /(?:(?:<\?php|<\?=)[\s\S]*?\?>)|(?:<%[\s\S]*?%>)/g; + +var in_html_comment; + +var Tokenizer = function(input_string, options) { + BaseTokenizer.call(this, input_string, options); + + this._patterns.whitespace = this._patterns.whitespace.matching( + /\u00A0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff/.source, + /\u2028\u2029/.source); + + var pattern_reader = new Pattern(this._input); + var templatable = new TemplatablePattern(this._input) + .read_options(this._options); + + this.__patterns = { + template: templatable, + identifier: templatable.starting_with(acorn.identifier).matching(acorn.identifierMatch), + number: pattern_reader.matching(number_pattern), + punct: pattern_reader.matching(punct_pattern), + // comment ends just before nearest linefeed or end of file + comment: pattern_reader.starting_with(/\/\//).until(/[\n\r\u2028\u2029]/), + // /* ... */ comment ends with nearest */ or end of file + block_comment: pattern_reader.starting_with(/\/\*/).until_after(/\*\//), + html_comment_start: pattern_reader.matching(/<!--/), + html_comment_end: pattern_reader.matching(/-->/), + include: pattern_reader.starting_with(/#include/).until_after(acorn.lineBreak), + shebang: pattern_reader.starting_with(/#!/).until_after(acorn.lineBreak), + xml: pattern_reader.matching(/[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\]|)(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*('[^']*'|"[^"]*"|{[\s\S]+?}))*\s*(\/?)\s*>/), + single_quote: templatable.until(/['\\\n\r\u2028\u2029]/), + double_quote: templatable.until(/["\\\n\r\u2028\u2029]/), + template_text: templatable.until(/[`\\$]/), + template_expression: templatable.until(/[`}\\]/) + }; + +}; +Tokenizer.prototype = new BaseTokenizer(); + +Tokenizer.prototype._is_comment = function(current_token) { + return current_token.type === TOKEN.COMMENT || current_token.type === TOKEN.BLOCK_COMMENT || current_token.type === TOKEN.UNKNOWN; +}; + +Tokenizer.prototype._is_opening = function(current_token) { + return current_token.type === TOKEN.START_BLOCK || current_token.type === TOKEN.START_EXPR; +}; + +Tokenizer.prototype._is_closing = function(current_token, open_token) { + return (current_token.type === TOKEN.END_BLOCK || current_token.type === TOKEN.END_EXPR) && + (open_token && ( + (current_token.text === ']' && open_token.text === '[') || + (current_token.text === ')' && open_token.text === '(') || + (current_token.text === '}' && open_token.text === '{'))); +}; + +Tokenizer.prototype._reset = function() { + in_html_comment = false; +}; + +Tokenizer.prototype._get_next_token = function(previous_token, open_token) { // jshint unused:false + var token = null; + this._readWhitespace(); + var c = this._input.peek(); + + if (c === null) { + return this._create_token(TOKEN.EOF, ''); + } + + token = token || this._read_non_javascript(c); + token = token || this._read_string(c); + token = token || this._read_word(previous_token); + token = token || this._read_singles(c); + token = token || this._read_comment(c); + token = token || this._read_regexp(c, previous_token); + token = token || this._read_xml(c, previous_token); + token = token || this._read_punctuation(); + token = token || this._create_token(TOKEN.UNKNOWN, this._input.next()); + + return token; +}; + +Tokenizer.prototype._read_word = function(previous_token) { + var resulting_string; + resulting_string = this.__patterns.identifier.read(); + if (resulting_string !== '') { + resulting_string = resulting_string.replace(acorn.allLineBreaks, '\n'); + if (!(previous_token.type === TOKEN.DOT || + (previous_token.type === TOKEN.RESERVED && (previous_token.text === 'set' || previous_token.text === 'get'))) && + reserved_word_pattern.test(resulting_string)) { + if (resulting_string === 'in' || resulting_string === 'of') { // hack for 'in' and 'of' operators + return this._create_token(TOKEN.OPERATOR, resulting_string); + } + return this._create_token(TOKEN.RESERVED, resulting_string); + } + return this._create_token(TOKEN.WORD, resulting_string); + } + + resulting_string = this.__patterns.number.read(); + if (resulting_string !== '') { + return this._create_token(TOKEN.WORD, resulting_string); + } +}; + +Tokenizer.prototype._read_singles = function(c) { + var token = null; + if (c === '(' || c === '[') { + token = this._create_token(TOKEN.START_EXPR, c); + } else if (c === ')' || c === ']') { + token = this._create_token(TOKEN.END_EXPR, c); + } else if (c === '{') { + token = this._create_token(TOKEN.START_BLOCK, c); + } else if (c === '}') { + token = this._create_token(TOKEN.END_BLOCK, c); + } else if (c === ';') { + token = this._create_token(TOKEN.SEMICOLON, c); + } else if (c === '.' && dot_pattern.test(this._input.peek(1))) { + token = this._create_token(TOKEN.DOT, c); + } else if (c === ',') { + token = this._create_token(TOKEN.COMMA, c); + } + + if (token) { + this._input.next(); + } + return token; +}; + +Tokenizer.prototype._read_punctuation = function() { + var resulting_string = this.__patterns.punct.read(); + + if (resulting_string !== '') { + if (resulting_string === '=') { + return this._create_token(TOKEN.EQUALS, resulting_string); + } else if (resulting_string === '?.') { + return this._create_token(TOKEN.DOT, resulting_string); + } else { + return this._create_token(TOKEN.OPERATOR, resulting_string); + } + } +}; + +Tokenizer.prototype._read_non_javascript = function(c) { + var resulting_string = ''; + + if (c === '#') { + if (this._is_first_token()) { + resulting_string = this.__patterns.shebang.read(); + + if (resulting_string) { + return this._create_token(TOKEN.UNKNOWN, resulting_string.trim() + '\n'); + } + } + + // handles extendscript #includes + resulting_string = this.__patterns.include.read(); + + if (resulting_string) { + return this._create_token(TOKEN.UNKNOWN, resulting_string.trim() + '\n'); + } + + c = this._input.next(); + + // Spidermonkey-specific sharp variables for circular references. Considered obsolete. + var sharp = '#'; + if (this._input.hasNext() && this._input.testChar(digit)) { + do { + c = this._input.next(); + sharp += c; + } while (this._input.hasNext() && c !== '#' && c !== '='); + if (c === '#') { + // + } else if (this._input.peek() === '[' && this._input.peek(1) === ']') { + sharp += '[]'; + this._input.next(); + this._input.next(); + } else if (this._input.peek() === '{' && this._input.peek(1) === '}') { + sharp += '{}'; + this._input.next(); + this._input.next(); + } + return this._create_token(TOKEN.WORD, sharp); + } + + this._input.back(); + + } else if (c === '<' && this._is_first_token()) { + resulting_string = this.__patterns.html_comment_start.read(); + if (resulting_string) { + while (this._input.hasNext() && !this._input.testChar(acorn.newline)) { + resulting_string += this._input.next(); + } + in_html_comment = true; + return this._create_token(TOKEN.COMMENT, resulting_string); + } + } else if (in_html_comment && c === '-') { + resulting_string = this.__patterns.html_comment_end.read(); + if (resulting_string) { + in_html_comment = false; + return this._create_token(TOKEN.COMMENT, resulting_string); + } + } + + return null; +}; + +Tokenizer.prototype._read_comment = function(c) { + var token = null; + if (c === '/') { + var comment = ''; + if (this._input.peek(1) === '*') { + // peek for comment /* ... */ + comment = this.__patterns.block_comment.read(); + var directives = directives_core.get_directives(comment); + if (directives && directives.ignore === 'start') { + comment += directives_core.readIgnored(this._input); + } + comment = comment.replace(acorn.allLineBreaks, '\n'); + token = this._create_token(TOKEN.BLOCK_COMMENT, comment); + token.directives = directives; + } else if (this._input.peek(1) === '/') { + // peek for comment // ... + comment = this.__patterns.comment.read(); + token = this._create_token(TOKEN.COMMENT, comment); + } + } + return token; +}; + +Tokenizer.prototype._read_string = function(c) { + if (c === '`' || c === "'" || c === '"') { + var resulting_string = this._input.next(); + this.has_char_escapes = false; + + if (c === '`') { + resulting_string += this._read_string_recursive('`', true, '${'); + } else { + resulting_string += this._read_string_recursive(c); + } + + if (this.has_char_escapes && this._options.unescape_strings) { + resulting_string = unescape_string(resulting_string); + } + + if (this._input.peek() === c) { + resulting_string += this._input.next(); + } + + resulting_string = resulting_string.replace(acorn.allLineBreaks, '\n'); + + return this._create_token(TOKEN.STRING, resulting_string); + } + + return null; +}; + +Tokenizer.prototype._allow_regexp_or_xml = function(previous_token) { + // regex and xml can only appear in specific locations during parsing + return (previous_token.type === TOKEN.RESERVED && in_array(previous_token.text, ['return', 'case', 'throw', 'else', 'do', 'typeof', 'yield'])) || + (previous_token.type === TOKEN.END_EXPR && previous_token.text === ')' && + previous_token.opened.previous.type === TOKEN.RESERVED && in_array(previous_token.opened.previous.text, ['if', 'while', 'for'])) || + (in_array(previous_token.type, [TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK, TOKEN.START, + TOKEN.END_BLOCK, TOKEN.OPERATOR, TOKEN.EQUALS, TOKEN.EOF, TOKEN.SEMICOLON, TOKEN.COMMA + ])); +}; + +Tokenizer.prototype._read_regexp = function(c, previous_token) { + + if (c === '/' && this._allow_regexp_or_xml(previous_token)) { + // handle regexp + // + var resulting_string = this._input.next(); + var esc = false; + + var in_char_class = false; + while (this._input.hasNext() && + ((esc || in_char_class || this._input.peek() !== c) && + !this._input.testChar(acorn.newline))) { + resulting_string += this._input.peek(); + if (!esc) { + esc = this._input.peek() === '\\'; + if (this._input.peek() === '[') { + in_char_class = true; + } else if (this._input.peek() === ']') { + in_char_class = false; + } + } else { + esc = false; + } + this._input.next(); + } + + if (this._input.peek() === c) { + resulting_string += this._input.next(); + + // regexps may have modifiers /regexp/MOD , so fetch those, too + // Only [gim] are valid, but if the user puts in garbage, do what we can to take it. + resulting_string += this._input.read(acorn.identifier); + } + return this._create_token(TOKEN.STRING, resulting_string); + } + return null; +}; + +Tokenizer.prototype._read_xml = function(c, previous_token) { + + if (this._options.e4x && c === "<" && this._allow_regexp_or_xml(previous_token)) { + var xmlStr = ''; + var match = this.__patterns.xml.read_match(); + // handle e4x xml literals + // + if (match) { + // Trim root tag to attempt to + var rootTag = match[2].replace(/^{\s+/, '{').replace(/\s+}$/, '}'); + var isCurlyRoot = rootTag.indexOf('{') === 0; + var depth = 0; + while (match) { + var isEndTag = !!match[1]; + var tagName = match[2]; + var isSingletonTag = (!!match[match.length - 1]) || (tagName.slice(0, 8) === "![CDATA["); + if (!isSingletonTag && + (tagName === rootTag || (isCurlyRoot && tagName.replace(/^{\s+/, '{').replace(/\s+}$/, '}')))) { + if (isEndTag) { + --depth; + } else { + ++depth; + } + } + xmlStr += match[0]; + if (depth <= 0) { + break; + } + match = this.__patterns.xml.read_match(); + } + // if we didn't close correctly, keep unformatted. + if (!match) { + xmlStr += this._input.match(/[\s\S]*/g)[0]; + } + xmlStr = xmlStr.replace(acorn.allLineBreaks, '\n'); + return this._create_token(TOKEN.STRING, xmlStr); + } + } + + return null; +}; + +function unescape_string(s) { + // You think that a regex would work for this + // return s.replace(/\\x([0-9a-f]{2})/gi, function(match, val) { + // return String.fromCharCode(parseInt(val, 16)); + // }) + // However, dealing with '\xff', '\\xff', '\\\xff' makes this more fun. + var out = '', + escaped = 0; + + var input_scan = new InputScanner(s); + var matched = null; + + while (input_scan.hasNext()) { + // Keep any whitespace, non-slash characters + // also keep slash pairs. + matched = input_scan.match(/([\s]|[^\\]|\\\\)+/g); + + if (matched) { + out += matched[0]; + } + + if (input_scan.peek() === '\\') { + input_scan.next(); + if (input_scan.peek() === 'x') { + matched = input_scan.match(/x([0-9A-Fa-f]{2})/g); + } else if (input_scan.peek() === 'u') { + matched = input_scan.match(/u([0-9A-Fa-f]{4})/g); + } else { + out += '\\'; + if (input_scan.hasNext()) { + out += input_scan.next(); + } + continue; + } + + // If there's some error decoding, return the original string + if (!matched) { + return s; + } + + escaped = parseInt(matched[1], 16); + + if (escaped > 0x7e && escaped <= 0xff && matched[0].indexOf('x') === 0) { + // we bail out on \x7f..\xff, + // leaving whole string escaped, + // as it's probably completely binary + return s; + } else if (escaped >= 0x00 && escaped < 0x20) { + // leave 0x00...0x1f escaped + out += '\\' + matched[0]; + continue; + } else if (escaped === 0x22 || escaped === 0x27 || escaped === 0x5c) { + // single-quote, apostrophe, backslash - escape these + out += '\\' + String.fromCharCode(escaped); + } else { + out += String.fromCharCode(escaped); + } + } + } + + return out; +} + +// handle string +// +Tokenizer.prototype._read_string_recursive = function(delimiter, allow_unescaped_newlines, start_sub) { + var current_char; + var pattern; + if (delimiter === '\'') { + pattern = this.__patterns.single_quote; + } else if (delimiter === '"') { + pattern = this.__patterns.double_quote; + } else if (delimiter === '`') { + pattern = this.__patterns.template_text; + } else if (delimiter === '}') { + pattern = this.__patterns.template_expression; + } + + var resulting_string = pattern.read(); + var next = ''; + while (this._input.hasNext()) { + next = this._input.next(); + if (next === delimiter || + (!allow_unescaped_newlines && acorn.newline.test(next))) { + this._input.back(); + break; + } else if (next === '\\' && this._input.hasNext()) { + current_char = this._input.peek(); + + if (current_char === 'x' || current_char === 'u') { + this.has_char_escapes = true; + } else if (current_char === '\r' && this._input.peek(1) === '\n') { + this._input.next(); + } + next += this._input.next(); + } else if (start_sub) { + if (start_sub === '${' && next === '$' && this._input.peek() === '{') { + next += this._input.next(); + } + + if (start_sub === next) { + if (delimiter === '`') { + next += this._read_string_recursive('}', allow_unescaped_newlines, '`'); + } else { + next += this._read_string_recursive('`', allow_unescaped_newlines, '${'); + } + if (this._input.hasNext()) { + next += this._input.next(); + } + } + } + next += pattern.read(); + resulting_string += next; + } + + return resulting_string; +}; + +module.exports.Tokenizer = Tokenizer; +module.exports.TOKEN = TOKEN; +module.exports.positionable_operators = positionable_operators.slice(); +module.exports.line_starters = line_starters.slice(); |