diff options
Diffstat (limited to 'alarm/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js')
-rw-r--r-- | alarm/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js | 295 |
1 files changed, 0 insertions, 295 deletions
diff --git a/alarm/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js b/alarm/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js deleted file mode 100644 index 25b7537..0000000 --- a/alarm/node_modules/html-encoding-sniffer/lib/html-encoding-sniffer.js +++ /dev/null @@ -1,295 +0,0 @@ -"use strict"; -const whatwgEncoding = require("whatwg-encoding"); - -// https://html.spec.whatwg.org/#encoding-sniffing-algorithm -module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => { - let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910 - - if (encoding === null && transportLayerEncodingLabel !== undefined) { - encoding = whatwgEncoding.labelToName(transportLayerEncodingLabel); - } - - if (encoding === null) { - encoding = prescanMetaCharset(buffer); - } - - if (encoding === null) { - encoding = defaultEncoding; - } - - return encoding; -}; - -// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding -function prescanMetaCharset(buffer) { - const l = Math.min(buffer.length, 1024); - for (let i = 0; i < l; i++) { - let c = buffer[i]; - if (c === 0x3C) { - // "<" - const c1 = buffer[i + 1]; - const c2 = buffer[i + 2]; - const c3 = buffer[i + 3]; - const c4 = buffer[i + 4]; - const c5 = buffer[i + 5]; - // !-- (comment start) - if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) { - i += 4; - for (; i < l; i++) { - c = buffer[i]; - const cMinus1 = buffer[i - 1]; - const cMinus2 = buffer[i - 2]; - // --> (comment end) - if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) { - break; - } - } - } else if ((c1 === 0x4D || c1 === 0x6D) && - (c2 === 0x45 || c2 === 0x65) && - (c3 === 0x54 || c3 === 0x74) && - (c4 === 0x41 || c4 === 0x61) && - (isSpaceCharacter(c5) || c5 === 0x2F)) { - // "meta" + space or / - i += 6; - const attributeList = new Set(); - let gotPragma = false; - let needPragma = null; - let charset = null; - - let attrRes; - do { - attrRes = getAttribute(buffer, i, l); - if (attrRes.attr && !attributeList.has(attrRes.attr.name)) { - attributeList.add(attrRes.attr.name); - if (attrRes.attr.name === "http-equiv") { - gotPragma = attrRes.attr.value === "content-type"; - } else if (attrRes.attr.name === "content" && !charset) { - charset = extractCharacterEncodingFromMeta(attrRes.attr.value); - if (charset !== null) { - needPragma = true; - } - } else if (attrRes.attr.name === "charset") { - charset = whatwgEncoding.labelToName(attrRes.attr.value); - needPragma = false; - } - } - i = attrRes.i; - } while (attrRes.attr); - - if (needPragma === null) { - continue; - } - if (needPragma === true && gotPragma === false) { - continue; - } - if (charset === null) { - continue; - } - - if (charset === "UTF-16LE" || charset === "UTF-16BE") { - charset = "UTF-8"; - } - if (charset === "x-user-defined") { - charset = "windows-1252"; - } - - return charset; - } else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) { - // a-z or A-Z - for (i += 2; i < l; i++) { - c = buffer[i]; - // space or > - if (isSpaceCharacter(c) || c === 0x3E) { - break; - } - } - let attrRes; - do { - attrRes = getAttribute(buffer, i, l); - i = attrRes.i; - } while (attrRes.attr); - } else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) { - // ! or / or ? - for (i += 2; i < l; i++) { - c = buffer[i]; - // > - if (c === 0x3E) { - break; - } - } - } - } - } - return null; -} - -// https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing -function getAttribute(buffer, i, l) { - for (; i < l; i++) { - let c = buffer[i]; - // space or / - if (isSpaceCharacter(c) || c === 0x2F) { - continue; - } - // ">" - if (c === 0x3E) { - break; - } - let name = ""; - let value = ""; - nameLoop:for (; i < l; i++) { - c = buffer[i]; - // "=" - if (c === 0x3D && name !== "") { - i++; - break; - } - // space - if (isSpaceCharacter(c)) { - for (i++; i < l; i++) { - c = buffer[i]; - // space - if (isSpaceCharacter(c)) { - continue; - } - // not "=" - if (c !== 0x3D) { - return { attr: { name, value }, i }; - } - - i++; - break nameLoop; - } - break; - } - // / or > - if (c === 0x2F || c === 0x3E) { - return { attr: { name, value }, i }; - } - // A-Z - if (c >= 0x41 && c <= 0x5A) { - name += String.fromCharCode(c + 0x20); // lowercase - } else { - name += String.fromCharCode(c); - } - } - c = buffer[i]; - // space - if (isSpaceCharacter(c)) { - for (i++; i < l; i++) { - c = buffer[i]; - // space - if (isSpaceCharacter(c)) { - continue; - } else { - break; - } - } - } - // " or ' - if (c === 0x22 || c === 0x27) { - const quote = c; - for (i++; i < l; i++) { - c = buffer[i]; - - if (c === quote) { - i++; - return { attr: { name, value }, i }; - } - - // A-Z - if (c >= 0x41 && c <= 0x5A) { - value += String.fromCharCode(c + 0x20); // lowercase - } else { - value += String.fromCharCode(c); - } - } - } - - // > - if (c === 0x3E) { - return { attr: { name, value }, i }; - } - - // A-Z - if (c >= 0x41 && c <= 0x5A) { - value += String.fromCharCode(c + 0x20); // lowercase - } else { - value += String.fromCharCode(c); - } - - for (i++; i < l; i++) { - c = buffer[i]; - - // space or > - if (isSpaceCharacter(c) || c === 0x3E) { - return { attr: { name, value }, i }; - } - - // A-Z - if (c >= 0x41 && c <= 0x5A) { - value += String.fromCharCode(c + 0x20); // lowercase - } else { - value += String.fromCharCode(c); - } - } - } - return { i }; -} - -function extractCharacterEncodingFromMeta(string) { - let position = 0; - - while (true) { - const indexOfCharset = string.substring(position).search(/charset/i); - - if (indexOfCharset === -1) { - return null; - } - let subPosition = position + indexOfCharset + "charset".length; - - while (isSpaceCharacter(string[subPosition].charCodeAt(0))) { - ++subPosition; - } - - if (string[subPosition] !== "=") { - position = subPosition - 1; - continue; - } - - ++subPosition; - - while (isSpaceCharacter(string[subPosition].charCodeAt(0))) { - ++subPosition; - } - - position = subPosition; - break; - } - - if (string[position] === "\"" || string[position] === "'") { - const nextIndex = string.indexOf(string[position], position + 1); - - if (nextIndex !== -1) { - return whatwgEncoding.labelToName(string.substring(position + 1, nextIndex)); - } - - // It is an unmatched quotation mark - return null; - } - - if (string.length === position + 1) { - return null; - } - - const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/); - const end = indexOfASCIIWhitespaceOrSemicolon === -1 ? - string.length : - position + indexOfASCIIWhitespaceOrSemicolon + 1; - - return whatwgEncoding.labelToName(string.substring(position, end)); -} - -function isSpaceCharacter(c) { - return c === 0x09 || c === 0x0A || c === 0x0C || c === 0x0D || c === 0x20; -} |