diff options
Diffstat (limited to 'node_modules/htmlparser2')
24 files changed, 2543 insertions, 0 deletions
diff --git a/node_modules/htmlparser2/LICENSE b/node_modules/htmlparser2/LICENSE new file mode 100644 index 0000000..0a35e02 --- /dev/null +++ b/node_modules/htmlparser2/LICENSE @@ -0,0 +1,18 @@ +Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE.
\ No newline at end of file diff --git a/node_modules/htmlparser2/README.md b/node_modules/htmlparser2/README.md new file mode 100644 index 0000000..5ec9095 --- /dev/null +++ b/node_modules/htmlparser2/README.md @@ -0,0 +1,164 @@ +# htmlparser2 + +[![NPM version](http://img.shields.io/npm/v/htmlparser2.svg?style=flat)](https://npmjs.org/package/htmlparser2) +[![Downloads](https://img.shields.io/npm/dm/htmlparser2.svg?style=flat)](https://npmjs.org/package/htmlparser2) +[![Build Status](https://img.shields.io/github/workflow/status/fb55/htmlparser2/Node.js%20Test?label=tests&style=flat)](https://github.com/fb55/htmlparser2/actions?query=workflow%3A%22Node.js+Test%22) +[![Coverage](http://img.shields.io/coveralls/fb55/htmlparser2.svg?style=flat)](https://coveralls.io/r/fb55/htmlparser2) + +The fast & forgiving HTML/XML parser. + +## Installation + + npm install htmlparser2 + +A live demo of `htmlparser2` is available [here](https://astexplorer.net/#/2AmVrGuGVJ). + +## Ecosystem + +| Name | Description | +| ------------------------------------------------------------- | ------------------------------------------------------- | +| [htmlparser2](https://github.com/fb55/htmlparser2) | Fast & forgiving HTML/XML parser | +| [domhandler](https://github.com/fb55/domhandler) | Handler for htmlparser2 that turns documents into a DOM | +| [domutils](https://github.com/fb55/domutils) | Utilities for working with domhandler's DOM | +| [css-select](https://github.com/fb55/css-select) | CSS selector engine, compatible with domhandler's DOM | +| [cheerio](https://github.com/cheeriojs/cheerio) | The jQuery API for domhandler's DOM | +| [dom-serializer](https://github.com/cheeriojs/dom-serializer) | Serializer for domhandler's DOM | + +## Usage + +`htmlparser2` itself provides a callback interface that allows consumption of documents with minimal allocations. +For a more ergonomic experience, read [Getting a DOM](#getting-a-dom) below. + +```javascript +const htmlparser2 = require("htmlparser2"); +const parser = new htmlparser2.Parser({ + onopentag(name, attributes) { + /* + * This fires when a new tag is opened. + * + * If you don't need an aggregated `attributes` object, + * have a look at the `onopentagname` and `onattribute` events. + */ + if (name === "script" && attributes.type === "text/javascript") { + console.log("JS! Hooray!"); + } + }, + ontext(text) { + /* + * Fires whenever a section of text was processed. + * + * Note that this can fire at any point within text and you might + * have to stich together multiple pieces. + */ + console.log("-->", text); + }, + onclosetag(tagname) { + /* + * Fires when a tag is closed. + * + * You can rely on this event only firing when you have received an + * equivalent opening tag before. Closing tags without corresponding + * opening tags will be ignored. + */ + if (tagname === "script") { + console.log("That's it?!"); + } + }, +}); +parser.write( + "Xyz <script type='text/javascript'>const foo = '<<bar>>';</ script>" +); +parser.end(); +``` + +Output (with multiple text events combined): + +``` +--> Xyz +JS! Hooray! +--> const foo = '<<bar>>'; +That's it?! +``` + +This example only shows three of the possible events. +Read more about the parser, its events and options in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options). + +### Usage with streams + +While the `Parser` interface closely resembles Node.js streams, it's not a 100% match. +Use the `WritableStream` interface to process a streaming input: + +```javascript +const { WritableStream } = require("htmlparser2/lib/WritableStream"); +const parserStream = new WritableStream({ + ontext(text) { + console.log("Streaming:", text); + }, +}); + +const htmlStream = fs.createReadStream("./my-file.html"); +htmlStream.pipe(parserStream).on("finish", () => console.log("done")); +``` + +## Getting a DOM + +The `DomHandler` produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper. + +```js +const htmlparser2 = require("htmlparser2"); + +const dom = htmlparser2.parseDocument(); +``` + +The `DomHandler`, while still bundled with this module, was moved to its [own module](https://github.com/fb55/domhandler). +Have a look at that for further information. + +## Parsing RSS/RDF/Atom Feeds + +```javascript +const feed = htmlparser2.parseFeed(content, options); +``` + +Note: While the provided feed handler works for most feeds, +you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained. + +## Performance + +After having some artificial benchmarks for some time, **@AndreasMadsen** published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites. + +At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests): + +``` +gumbo-parser : 34.9208 ms/file ± 21.4238 +html-parser : 24.8224 ms/file ± 15.8703 +html5 : 419.597 ms/file ± 264.265 +htmlparser : 60.0722 ms/file ± 384.844 +htmlparser2-dom: 12.0749 ms/file ± 6.49474 +htmlparser2 : 7.49130 ms/file ± 5.74368 +hubbub : 30.4980 ms/file ± 16.4682 +libxmljs : 14.1338 ms/file ± 18.6541 +parse5 : 22.0439 ms/file ± 15.3743 +sax : 49.6513 ms/file ± 26.6032 +``` + +## How does this module differ from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)? + +This module started as a fork of the `htmlparser` module. +The main difference is that `htmlparser2` is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). +`htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore. + +The parser now provides a callback interface inspired by [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). +As a result, old handlers won't work anymore. + +The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, your code should work as expected. + +## Security contact information + +To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure. + +## `htmlparser2` for enterprise + +Available as part of the Tidelift Subscription + +The maintainers of `htmlparser2` and thousands of other packages are working with Tidelift to deliver commercial support and maintenance for the open source dependencies you use to build your applications. Save time, reduce risk, and improve code health, while paying the maintainers of the exact dependencies you use. [Learn more.](https://tidelift.com/subscription/pkg/npm-htmlparser2?utm_source=npm-htmlparser2&utm_medium=referral&utm_campaign=enterprise&utm_term=repo) diff --git a/node_modules/htmlparser2/lib/CollectingHandler.d.ts b/node_modules/htmlparser2/lib/CollectingHandler.d.ts new file mode 100644 index 0000000..603a06e --- /dev/null +++ b/node_modules/htmlparser2/lib/CollectingHandler.d.ts @@ -0,0 +1,10 @@ +import MultiplexHandler from "./MultiplexHandler"; +import { Handler } from "./Parser"; +export declare class CollectingHandler extends MultiplexHandler { + private readonly cbs; + events: [keyof Handler, ...unknown[]][]; + constructor(cbs?: Partial<Handler>); + onreset(): void; + restart(): void; +} +//# sourceMappingURL=CollectingHandler.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/CollectingHandler.d.ts.map b/node_modules/htmlparser2/lib/CollectingHandler.d.ts.map new file mode 100644 index 0000000..d4356d1 --- /dev/null +++ b/node_modules/htmlparser2/lib/CollectingHandler.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"CollectingHandler.d.ts","sourceRoot":"","sources":["../src/CollectingHandler.ts"],"names":[],"mappings":"AAAA,OAAO,gBAAgB,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAInC,qBAAa,iBAAkB,SAAQ,gBAAgB;IAGvC,OAAO,CAAC,QAAQ,CAAC,GAAG;IAFzB,MAAM,EAAE,CAAC,MAAM,OAAO,EAAE,GAAG,OAAO,EAAE,CAAC,EAAE,CAAM;gBAEvB,GAAG,GAAE,OAAO,CAAC,OAAO,CAAM;IAOvD,OAAO,IAAI,IAAI;IAKf,OAAO,IAAI,IAAI;CAOlB"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/CollectingHandler.js b/node_modules/htmlparser2/lib/CollectingHandler.js new file mode 100644 index 0000000..03bdf4f --- /dev/null +++ b/node_modules/htmlparser2/lib/CollectingHandler.js @@ -0,0 +1,60 @@ +"use strict"; +var __extends = (this && this.__extends) || (function () { + var extendStatics = function (d, b) { + extendStatics = Object.setPrototypeOf || + ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || + function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; + return extendStatics(d, b); + }; + return function (d, b) { + if (typeof b !== "function" && b !== null) + throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); + extendStatics(d, b); + function __() { this.constructor = d; } + d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); + }; +})(); +var __spreadArray = (this && this.__spreadArray) || function (to, from) { + for (var i = 0, il = from.length, j = to.length; i < il; i++, j++) + to[j] = from[i]; + return to; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.CollectingHandler = void 0; +var MultiplexHandler_1 = __importDefault(require("./MultiplexHandler")); +var CollectingHandler = /** @class */ (function (_super) { + __extends(CollectingHandler, _super); + function CollectingHandler(cbs) { + if (cbs === void 0) { cbs = {}; } + var _this = _super.call(this, function (name) { + var _a; + var args = []; + for (var _i = 1; _i < arguments.length; _i++) { + args[_i - 1] = arguments[_i]; + } + _this.events.push(__spreadArray([name], args)); + (_a = _this.cbs[name]) === null || _a === void 0 ? void 0 : _a.apply(void 0, args); + }) || this; + _this.cbs = cbs; + _this.events = []; + return _this; + } + CollectingHandler.prototype.onreset = function () { + var _a, _b; + this.events = []; + (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a); + }; + CollectingHandler.prototype.restart = function () { + var _a, _b, _c; + (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a); + for (var _i = 0, _d = this.events; _i < _d.length; _i++) { + var _e = _d[_i], name_1 = _e[0], args = _e.slice(1); + (_c = this.cbs[name_1]) === null || _c === void 0 ? void 0 : _c.apply(void 0, args); + } + }; + return CollectingHandler; +}(MultiplexHandler_1.default)); +exports.CollectingHandler = CollectingHandler; diff --git a/node_modules/htmlparser2/lib/FeedHandler.d.ts b/node_modules/htmlparser2/lib/FeedHandler.d.ts new file mode 100644 index 0000000..61a314f --- /dev/null +++ b/node_modules/htmlparser2/lib/FeedHandler.d.ts @@ -0,0 +1,67 @@ +import DomHandler, { DomHandlerOptions } from "domhandler"; +import { ParserOptions } from "./Parser"; +declare enum FeedItemMediaMedium { + image = 0, + audio = 1, + video = 2, + document = 3, + executable = 4 +} +declare enum FeedItemMediaExpression { + sample = 0, + full = 1, + nonstop = 2 +} +interface FeedItemMedia { + url?: string; + fileSize?: number; + type?: string; + medium: FeedItemMediaMedium | undefined; + isDefault: boolean; + expression?: FeedItemMediaExpression; + bitrate?: number; + framerate?: number; + samplingrate?: number; + channels?: number; + duration?: number; + height?: number; + width?: number; + lang?: string; +} +interface FeedItem { + id?: string; + title?: string; + link?: string; + description?: string; + pubDate?: Date; + media?: FeedItemMedia[]; +} +interface Feed { + type?: string; + id?: string; + title?: string; + link?: string; + description?: string; + updated?: Date; + author?: string; + items?: FeedItem[]; +} +export declare class FeedHandler extends DomHandler { + feed?: Feed; + /** + * + * @param callback + * @param options + */ + constructor(callback?: ((error: Error | null) => void) | DomHandlerOptions, options?: DomHandlerOptions); + onend(): void; +} +/** + * Parse a feed. + * + * @param feed The feed that should be parsed, as a string. + * @param options Optionally, options for parsing. When using this option, you should set `xmlMode` to `true`. + */ +export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | undefined; +export {}; +//# sourceMappingURL=FeedHandler.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/FeedHandler.d.ts.map b/node_modules/htmlparser2/lib/FeedHandler.d.ts.map new file mode 100644 index 0000000..90b9b3a --- /dev/null +++ b/node_modules/htmlparser2/lib/FeedHandler.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"FeedHandler.d.ts","sourceRoot":"","sources":["../src/FeedHandler.ts"],"names":[],"mappings":"AAAA,OAAO,UAAU,EAAE,EAAE,iBAAiB,EAAiB,MAAM,YAAY,CAAC;AAE1E,OAAO,EAAU,aAAa,EAAE,MAAM,UAAU,CAAC;AAEjD,aAAK,mBAAmB;IACpB,KAAK,IAAA;IACL,KAAK,IAAA;IACL,KAAK,IAAA;IACL,QAAQ,IAAA;IACR,UAAU,IAAA;CACb;AAED,aAAK,uBAAuB;IACxB,MAAM,IAAA;IACN,IAAI,IAAA;IACJ,OAAO,IAAA;CACV;AAED,UAAU,aAAa;IACnB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,mBAAmB,GAAG,SAAS,CAAC;IACxC,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,uBAAuB,CAAC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,QAAQ;IACd,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,IAAI,CAAC;IACf,KAAK,CAAC,EAAE,aAAa,EAAE,CAAC;CAC3B;AAED,UAAU,IAAI;IACV,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,IAAI,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,QAAQ,EAAE,CAAC;CACtB;AAGD,qBAAa,WAAY,SAAQ,UAAU;IACvC,IAAI,CAAC,EAAE,IAAI,CAAC;IAEZ;;;;OAIG;gBAEC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,KAAK,IAAI,CAAC,GAAG,iBAAiB,EAC9D,OAAO,CAAC,EAAE,iBAAiB;IAS/B,KAAK,IAAI,IAAI;CAkGhB;AA4FD;;;;;GAKG;AACH,wBAAgB,SAAS,CACrB,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,aAAa,GAAG,iBAAqC,GAC/D,IAAI,GAAG,SAAS,CAIlB"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/FeedHandler.js b/node_modules/htmlparser2/lib/FeedHandler.js new file mode 100644 index 0000000..e8e2642 --- /dev/null +++ b/node_modules/htmlparser2/lib/FeedHandler.js @@ -0,0 +1,235 @@ +"use strict"; +var __extends = (this && this.__extends) || (function () { + var extendStatics = function (d, b) { + extendStatics = Object.setPrototypeOf || + ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || + function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; + return extendStatics(d, b); + }; + return function (d, b) { + if (typeof b !== "function" && b !== null) + throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); + extendStatics(d, b); + function __() { this.constructor = d; } + d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); + }; +})(); +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.parseFeed = exports.FeedHandler = void 0; +var domhandler_1 = __importDefault(require("domhandler")); +var DomUtils = __importStar(require("domutils")); +var Parser_1 = require("./Parser"); +var FeedItemMediaMedium; +(function (FeedItemMediaMedium) { + FeedItemMediaMedium[FeedItemMediaMedium["image"] = 0] = "image"; + FeedItemMediaMedium[FeedItemMediaMedium["audio"] = 1] = "audio"; + FeedItemMediaMedium[FeedItemMediaMedium["video"] = 2] = "video"; + FeedItemMediaMedium[FeedItemMediaMedium["document"] = 3] = "document"; + FeedItemMediaMedium[FeedItemMediaMedium["executable"] = 4] = "executable"; +})(FeedItemMediaMedium || (FeedItemMediaMedium = {})); +var FeedItemMediaExpression; +(function (FeedItemMediaExpression) { + FeedItemMediaExpression[FeedItemMediaExpression["sample"] = 0] = "sample"; + FeedItemMediaExpression[FeedItemMediaExpression["full"] = 1] = "full"; + FeedItemMediaExpression[FeedItemMediaExpression["nonstop"] = 2] = "nonstop"; +})(FeedItemMediaExpression || (FeedItemMediaExpression = {})); +// TODO: Consume data as it is coming in +var FeedHandler = /** @class */ (function (_super) { + __extends(FeedHandler, _super); + /** + * + * @param callback + * @param options + */ + function FeedHandler(callback, options) { + var _this = this; + if (typeof callback === "object") { + callback = undefined; + options = callback; + } + _this = _super.call(this, callback, options) || this; + return _this; + } + FeedHandler.prototype.onend = function () { + var _a, _b; + var feedRoot = getOneElement(isValidFeed, this.dom); + if (!feedRoot) { + this.handleCallback(new Error("couldn't find root of feed")); + return; + } + var feed = {}; + if (feedRoot.name === "feed") { + var childs = feedRoot.children; + feed.type = "atom"; + addConditionally(feed, "id", "id", childs); + addConditionally(feed, "title", "title", childs); + var href = getAttribute("href", getOneElement("link", childs)); + if (href) { + feed.link = href; + } + addConditionally(feed, "description", "subtitle", childs); + var updated = fetch("updated", childs); + if (updated) { + feed.updated = new Date(updated); + } + addConditionally(feed, "author", "email", childs, true); + feed.items = getElements("entry", childs).map(function (item) { + var entry = {}; + var children = item.children; + addConditionally(entry, "id", "id", children); + addConditionally(entry, "title", "title", children); + var href = getAttribute("href", getOneElement("link", children)); + if (href) { + entry.link = href; + } + var description = fetch("summary", children) || fetch("content", children); + if (description) { + entry.description = description; + } + var pubDate = fetch("updated", children); + if (pubDate) { + entry.pubDate = new Date(pubDate); + } + entry.media = getMediaElements(children); + return entry; + }); + } + else { + var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : []; + feed.type = feedRoot.name.substr(0, 3); + feed.id = ""; + addConditionally(feed, "title", "title", childs); + addConditionally(feed, "link", "link", childs); + addConditionally(feed, "description", "description", childs); + var updated = fetch("lastBuildDate", childs); + if (updated) { + feed.updated = new Date(updated); + } + addConditionally(feed, "author", "managingEditor", childs, true); + feed.items = getElements("item", feedRoot.children).map(function (item) { + var entry = {}; + var children = item.children; + addConditionally(entry, "id", "guid", children); + addConditionally(entry, "title", "title", children); + addConditionally(entry, "link", "link", children); + addConditionally(entry, "description", "description", children); + var pubDate = fetch("pubDate", children); + if (pubDate) + entry.pubDate = new Date(pubDate); + entry.media = getMediaElements(children); + return entry; + }); + } + this.feed = feed; + this.handleCallback(null); + }; + return FeedHandler; +}(domhandler_1.default)); +exports.FeedHandler = FeedHandler; +function getMediaElements(where) { + return getElements("media:content", where).map(function (elem) { + var media = { + medium: elem.attribs.medium, + isDefault: !!elem.attribs.isDefault, + }; + if (elem.attribs.url) { + media.url = elem.attribs.url; + } + if (elem.attribs.fileSize) { + media.fileSize = parseInt(elem.attribs.fileSize, 10); + } + if (elem.attribs.type) { + media.type = elem.attribs.type; + } + if (elem.attribs.expression) { + media.expression = elem.attribs + .expression; + } + if (elem.attribs.bitrate) { + media.bitrate = parseInt(elem.attribs.bitrate, 10); + } + if (elem.attribs.framerate) { + media.framerate = parseInt(elem.attribs.framerate, 10); + } + if (elem.attribs.samplingrate) { + media.samplingrate = parseInt(elem.attribs.samplingrate, 10); + } + if (elem.attribs.channels) { + media.channels = parseInt(elem.attribs.channels, 10); + } + if (elem.attribs.duration) { + media.duration = parseInt(elem.attribs.duration, 10); + } + if (elem.attribs.height) { + media.height = parseInt(elem.attribs.height, 10); + } + if (elem.attribs.width) { + media.width = parseInt(elem.attribs.width, 10); + } + if (elem.attribs.lang) { + media.lang = elem.attribs.lang; + } + return media; + }); +} +function getElements(tagName, where) { + return DomUtils.getElementsByTagName(tagName, where, true); +} +function getOneElement(tagName, node) { + return DomUtils.getElementsByTagName(tagName, node, true, 1)[0]; +} +function fetch(tagName, where, recurse) { + if (recurse === void 0) { recurse = false; } + return DomUtils.getText(DomUtils.getElementsByTagName(tagName, where, recurse, 1)).trim(); +} +function getAttribute(name, elem) { + if (!elem) { + return null; + } + var attribs = elem.attribs; + return attribs[name]; +} +function addConditionally(obj, prop, what, where, recurse) { + if (recurse === void 0) { recurse = false; } + var tmp = fetch(what, where, recurse); + if (tmp) + obj[prop] = tmp; +} +function isValidFeed(value) { + return value === "rss" || value === "feed" || value === "rdf:RDF"; +} +/** + * Parse a feed. + * + * @param feed The feed that should be parsed, as a string. + * @param options Optionally, options for parsing. When using this option, you should set `xmlMode` to `true`. + */ +function parseFeed(feed, options) { + if (options === void 0) { options = { xmlMode: true }; } + var handler = new FeedHandler(options); + new Parser_1.Parser(handler, options).end(feed); + return handler.feed; +} +exports.parseFeed = parseFeed; diff --git a/node_modules/htmlparser2/lib/MultiplexHandler.d.ts b/node_modules/htmlparser2/lib/MultiplexHandler.d.ts new file mode 100644 index 0000000..67cb4d1 --- /dev/null +++ b/node_modules/htmlparser2/lib/MultiplexHandler.d.ts @@ -0,0 +1,28 @@ +import type { Parser, Handler } from "./Parser"; +/** + * Calls a specific handler function for all events that are encountered. + */ +export default class MultiplexHandler implements Handler { + private readonly func; + /** + * @param func The function to multiplex all events to. + */ + constructor(func: (event: keyof Handler, ...args: unknown[]) => void); + onattribute(name: string, value: string, quote: string | null | undefined): void; + oncdatastart(): void; + oncdataend(): void; + ontext(text: string): void; + onprocessinginstruction(name: string, value: string): void; + oncomment(comment: string): void; + oncommentend(): void; + onclosetag(name: string): void; + onopentag(name: string, attribs: { + [key: string]: string; + }): void; + onopentagname(name: string): void; + onerror(error: Error): void; + onend(): void; + onparserinit(parser: Parser): void; + onreset(): void; +} +//# sourceMappingURL=MultiplexHandler.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/MultiplexHandler.d.ts.map b/node_modules/htmlparser2/lib/MultiplexHandler.d.ts.map new file mode 100644 index 0000000..031ab5f --- /dev/null +++ b/node_modules/htmlparser2/lib/MultiplexHandler.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"MultiplexHandler.d.ts","sourceRoot":"","sources":["../src/MultiplexHandler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAEhD;;GAEG;AACH,MAAM,CAAC,OAAO,OAAO,gBAAiB,YAAW,OAAO;IAKhD,OAAO,CAAC,QAAQ,CAAC,IAAI;IAJzB;;OAEG;gBAEkB,IAAI,EAAE,CACnB,KAAK,EAAE,MAAM,OAAO,EACpB,GAAG,IAAI,EAAE,OAAO,EAAE,KACjB,IAAI;IAGb,WAAW,CACP,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GACjC,IAAI;IAGP,YAAY,IAAI,IAAI;IAGpB,UAAU,IAAI,IAAI;IAGlB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAG1B,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAG1D,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAGhC,YAAY,IAAI,IAAI;IAGpB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAG9B,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,GAAG,IAAI;IAGjE,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAGjC,OAAO,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI;IAG3B,KAAK,IAAI,IAAI;IAGb,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAGlC,OAAO,IAAI,IAAI;CAGlB"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/MultiplexHandler.js b/node_modules/htmlparser2/lib/MultiplexHandler.js new file mode 100644 index 0000000..fcb1aab --- /dev/null +++ b/node_modules/htmlparser2/lib/MultiplexHandler.js @@ -0,0 +1,57 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +/** + * Calls a specific handler function for all events that are encountered. + */ +var MultiplexHandler = /** @class */ (function () { + /** + * @param func The function to multiplex all events to. + */ + function MultiplexHandler(func) { + this.func = func; + } + MultiplexHandler.prototype.onattribute = function (name, value, quote) { + this.func("onattribute", name, value, quote); + }; + MultiplexHandler.prototype.oncdatastart = function () { + this.func("oncdatastart"); + }; + MultiplexHandler.prototype.oncdataend = function () { + this.func("oncdataend"); + }; + MultiplexHandler.prototype.ontext = function (text) { + this.func("ontext", text); + }; + MultiplexHandler.prototype.onprocessinginstruction = function (name, value) { + this.func("onprocessinginstruction", name, value); + }; + MultiplexHandler.prototype.oncomment = function (comment) { + this.func("oncomment", comment); + }; + MultiplexHandler.prototype.oncommentend = function () { + this.func("oncommentend"); + }; + MultiplexHandler.prototype.onclosetag = function (name) { + this.func("onclosetag", name); + }; + MultiplexHandler.prototype.onopentag = function (name, attribs) { + this.func("onopentag", name, attribs); + }; + MultiplexHandler.prototype.onopentagname = function (name) { + this.func("onopentagname", name); + }; + MultiplexHandler.prototype.onerror = function (error) { + this.func("onerror", error); + }; + MultiplexHandler.prototype.onend = function () { + this.func("onend"); + }; + MultiplexHandler.prototype.onparserinit = function (parser) { + this.func("onparserinit", parser); + }; + MultiplexHandler.prototype.onreset = function () { + this.func("onreset"); + }; + return MultiplexHandler; +}()); +exports.default = MultiplexHandler; diff --git a/node_modules/htmlparser2/lib/Parser.d.ts b/node_modules/htmlparser2/lib/Parser.d.ts new file mode 100644 index 0000000..e5a7c49 --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.d.ts @@ -0,0 +1,159 @@ +import Tokenizer from "./Tokenizer"; +export interface ParserOptions { + /** + * Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment + * and if "empty" tags (eg. `<br>`) can have children. If `false`, the content of special tags + * will be text only. For feeds and other XML content (documents that don't consist of HTML), + * set this to `true`. + * + * @default false + */ + xmlMode?: boolean; + /** + * Decode entities within the document. + * + * @default true + */ + decodeEntities?: boolean; + /** + * If set to true, all tags will be lowercased. + * + * @default !xmlMode + */ + lowerCaseTags?: boolean; + /** + * If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed. + * + * @default !xmlMode + */ + lowerCaseAttributeNames?: boolean; + /** + * If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled. + * NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text. + * + * @default xmlMode + */ + recognizeCDATA?: boolean; + /** + * If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`. + * NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized. + * + * @default xmlMode + */ + recognizeSelfClosing?: boolean; + /** + * Allows the default tokenizer to be overwritten. + */ + Tokenizer?: typeof Tokenizer; +} +export interface Handler { + onparserinit(parser: Parser): void; + /** + * Resets the handler back to starting state + */ + onreset(): void; + /** + * Signals the handler that parsing is done + */ + onend(): void; + onerror(error: Error): void; + onclosetag(name: string): void; + onopentagname(name: string): void; + /** + * + * @param name Name of the attribute + * @param value Value of the attribute. + * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value. + */ + onattribute(name: string, value: string, quote?: string | undefined | null): void; + onopentag(name: string, attribs: { + [s: string]: string; + }): void; + ontext(data: string): void; + oncomment(data: string): void; + oncdatastart(): void; + oncdataend(): void; + oncommentend(): void; + onprocessinginstruction(name: string, data: string): void; +} +export declare class Parser { + /** The start index of the last event. */ + startIndex: number; + /** The end index of the last event. */ + endIndex: number | null; + private tagname; + private attribname; + private attribvalue; + private attribs; + private stack; + private readonly foreignContext; + private readonly cbs; + private readonly options; + private readonly lowerCaseTagNames; + private readonly lowerCaseAttributeNames; + private readonly tokenizer; + constructor(cbs: Partial<Handler> | null, options?: ParserOptions); + private updatePosition; + ontext(data: string): void; + onopentagname(name: string): void; + onopentagend(): void; + onclosetag(name: string): void; + onselfclosingtag(): void; + private closeCurrentTag; + onattribname(name: string): void; + onattribdata(value: string): void; + onattribend(quote: string | undefined | null): void; + private getInstructionName; + ondeclaration(value: string): void; + onprocessinginstruction(value: string): void; + oncomment(value: string): void; + oncdata(value: string): void; + onerror(err: Error): void; + onend(): void; + /** + * Resets the parser to a blank state, ready to parse a new HTML document + */ + reset(): void; + /** + * Resets the parser, then parses a complete document and + * pushes it to the handler. + * + * @param data Document to parse. + */ + parseComplete(data: string): void; + /** + * Parses a chunk of data and calls the corresponding callbacks. + * + * @param chunk Chunk to parse. + */ + write(chunk: string): void; + /** + * Parses the end of the buffer and clears the stack, calls onend. + * + * @param chunk Optional final chunk to parse. + */ + end(chunk?: string): void; + /** + * Pauses parsing. The parser won't emit events until `resume` is called. + */ + pause(): void; + /** + * Resumes parsing after `pause` was called. + */ + resume(): void; + /** + * Alias of `write`, for backwards compatibility. + * + * @param chunk Chunk to parse. + * @deprecated + */ + parseChunk(chunk: string): void; + /** + * Alias of `end`, for backwards compatibility. + * + * @param chunk Optional final chunk to parse. + * @deprecated + */ + done(chunk?: string): void; +} +//# sourceMappingURL=Parser.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Parser.d.ts.map b/node_modules/htmlparser2/lib/Parser.d.ts.map new file mode 100644 index 0000000..2e8bce3 --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"Parser.d.ts","sourceRoot":"","sources":["../src/Parser.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,aAAa,CAAC;AAoGpC,MAAM,WAAW,aAAa;IAC1B;;;;;;;OAOG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB;;;;OAIG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IAExB;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAElC;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,SAAS,CAAC;CAChC;AAED,MAAM,WAAW,OAAO;IACpB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAEnC;;OAEG;IACH,OAAO,IAAI,IAAI,CAAC;IAEhB;;OAEG;IACH,KAAK,IAAI,IAAI,CAAC;IACd,OAAO,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC;;;;;OAKG;IACH,WAAW,CACP,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAClC,IAAI,CAAC;IACR,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE;QAAE,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAChE,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,YAAY,IAAI,IAAI,CAAC;IACrB,UAAU,IAAI,IAAI,CAAC;IACnB,YAAY,IAAI,IAAI,CAAC;IACrB,uBAAuB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7D;AAID,qBAAa,MAAM;IACf,yCAAyC;IAClC,UAAU,SAAK;IACtB,uCAAuC;IAChC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAQ;IAEtC,OAAO,CAAC,OAAO,CAAM;IACrB,OAAO,CAAC,UAAU,CAAM;IACxB,OAAO,CAAC,WAAW,CAAM;IACzB,OAAO,CAAC,OAAO,CAA0C;IACzD,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAmB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAgB;IACxC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAU;IAC5C,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAU;IAClD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAY;gBAE1B,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,IAAI,EAAE,OAAO,GAAE,aAAkB;IAarE,OAAO,CAAC,cAAc;IActB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAM1B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IA+BjC,YAAY,IAAI,IAAI;IAgBpB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAkC9B,gBAAgB,IAAI,IAAI;IAYxB,OAAO,CAAC,eAAe;IAavB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAOhC,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAIjC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,IAAI;IAYnD,OAAO,CAAC,kBAAkB;IAW1B,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAOlC,uBAAuB,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAO5C,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAM9B,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAW5B,OAAO,CAAC,GAAG,EAAE,KAAK,GAAG,IAAI;IAIzB,KAAK,IAAI,IAAI;IAWb;;OAEG;IACI,KAAK,IAAI,IAAI;IAUpB;;;;;OAKG;IACI,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAKxC;;;;OAIG;IACI,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAIjC;;;;OAIG;IACI,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;IAIhC;;OAEG;IACI,KAAK,IAAI,IAAI;IAIpB;;OAEG;IACI,MAAM,IAAI,IAAI;IAIrB;;;;;OAKG;IACI,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAGtC;;;;;OAKG;IACI,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;CAGpC"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Parser.js b/node_modules/htmlparser2/lib/Parser.js new file mode 100644 index 0000000..adc323d --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.js @@ -0,0 +1,381 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.Parser = void 0; +var Tokenizer_1 = __importDefault(require("./Tokenizer")); +var formTags = new Set([ + "input", + "option", + "optgroup", + "select", + "button", + "datalist", + "textarea", +]); +var pTag = new Set(["p"]); +var openImpliesClose = { + tr: new Set(["tr", "th", "td"]), + th: new Set(["th"]), + td: new Set(["thead", "th", "td"]), + body: new Set(["head", "link", "script"]), + li: new Set(["li"]), + p: pTag, + h1: pTag, + h2: pTag, + h3: pTag, + h4: pTag, + h5: pTag, + h6: pTag, + select: formTags, + input: formTags, + output: formTags, + button: formTags, + datalist: formTags, + textarea: formTags, + option: new Set(["option"]), + optgroup: new Set(["optgroup", "option"]), + dd: new Set(["dt", "dd"]), + dt: new Set(["dt", "dd"]), + address: pTag, + article: pTag, + aside: pTag, + blockquote: pTag, + details: pTag, + div: pTag, + dl: pTag, + fieldset: pTag, + figcaption: pTag, + figure: pTag, + footer: pTag, + form: pTag, + header: pTag, + hr: pTag, + main: pTag, + nav: pTag, + ol: pTag, + pre: pTag, + section: pTag, + table: pTag, + ul: pTag, + rt: new Set(["rt", "rp"]), + rp: new Set(["rt", "rp"]), + tbody: new Set(["thead", "tbody"]), + tfoot: new Set(["thead", "tbody"]), +}; +var voidElements = new Set([ + "area", + "base", + "basefont", + "br", + "col", + "command", + "embed", + "frame", + "hr", + "img", + "input", + "isindex", + "keygen", + "link", + "meta", + "param", + "source", + "track", + "wbr", +]); +var foreignContextElements = new Set(["math", "svg"]); +var htmlIntegrationElements = new Set([ + "mi", + "mo", + "mn", + "ms", + "mtext", + "annotation-xml", + "foreignObject", + "desc", + "title", +]); +var reNameEnd = /\s|\//; +var Parser = /** @class */ (function () { + function Parser(cbs, options) { + if (options === void 0) { options = {}; } + var _a, _b, _c, _d, _e; + /** The start index of the last event. */ + this.startIndex = 0; + /** The end index of the last event. */ + this.endIndex = null; + this.tagname = ""; + this.attribname = ""; + this.attribvalue = ""; + this.attribs = null; + this.stack = []; + this.foreignContext = []; + this.options = options; + this.cbs = cbs !== null && cbs !== void 0 ? cbs : {}; + this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode; + this.lowerCaseAttributeNames = + (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode; + this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_1.default)(this.options, this); + (_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this); + } + Parser.prototype.updatePosition = function (initialOffset) { + if (this.endIndex === null) { + if (this.tokenizer.sectionStart <= initialOffset) { + this.startIndex = 0; + } + else { + this.startIndex = this.tokenizer.sectionStart - initialOffset; + } + } + else { + this.startIndex = this.endIndex + 1; + } + this.endIndex = this.tokenizer.getAbsoluteIndex(); + }; + // Tokenizer event handlers + Parser.prototype.ontext = function (data) { + var _a, _b; + this.updatePosition(1); + this.endIndex--; + (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data); + }; + Parser.prototype.onopentagname = function (name) { + var _a, _b; + if (this.lowerCaseTagNames) { + name = name.toLowerCase(); + } + this.tagname = name; + if (!this.options.xmlMode && + Object.prototype.hasOwnProperty.call(openImpliesClose, name)) { + var el = void 0; + while (this.stack.length > 0 && + openImpliesClose[name].has((el = this.stack[this.stack.length - 1]))) { + this.onclosetag(el); + } + } + if (this.options.xmlMode || !voidElements.has(name)) { + this.stack.push(name); + if (foreignContextElements.has(name)) { + this.foreignContext.push(true); + } + else if (htmlIntegrationElements.has(name)) { + this.foreignContext.push(false); + } + } + (_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name); + if (this.cbs.onopentag) + this.attribs = {}; + }; + Parser.prototype.onopentagend = function () { + var _a, _b; + this.updatePosition(1); + if (this.attribs) { + (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs); + this.attribs = null; + } + if (!this.options.xmlMode && + this.cbs.onclosetag && + voidElements.has(this.tagname)) { + this.cbs.onclosetag(this.tagname); + } + this.tagname = ""; + }; + Parser.prototype.onclosetag = function (name) { + this.updatePosition(1); + if (this.lowerCaseTagNames) { + name = name.toLowerCase(); + } + if (foreignContextElements.has(name) || + htmlIntegrationElements.has(name)) { + this.foreignContext.pop(); + } + if (this.stack.length && + (this.options.xmlMode || !voidElements.has(name))) { + var pos = this.stack.lastIndexOf(name); + if (pos !== -1) { + if (this.cbs.onclosetag) { + pos = this.stack.length - pos; + while (pos--) { + // We know the stack has sufficient elements. + this.cbs.onclosetag(this.stack.pop()); + } + } + else + this.stack.length = pos; + } + else if (name === "p" && !this.options.xmlMode) { + this.onopentagname(name); + this.closeCurrentTag(); + } + } + else if (!this.options.xmlMode && (name === "br" || name === "p")) { + this.onopentagname(name); + this.closeCurrentTag(); + } + }; + Parser.prototype.onselfclosingtag = function () { + if (this.options.xmlMode || + this.options.recognizeSelfClosing || + this.foreignContext[this.foreignContext.length - 1]) { + this.closeCurrentTag(); + } + else { + this.onopentagend(); + } + }; + Parser.prototype.closeCurrentTag = function () { + var _a, _b; + var name = this.tagname; + this.onopentagend(); + /* + * Self-closing tags will be on the top of the stack + * (cheaper check than in onclosetag) + */ + if (this.stack[this.stack.length - 1] === name) { + (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name); + this.stack.pop(); + } + }; + Parser.prototype.onattribname = function (name) { + if (this.lowerCaseAttributeNames) { + name = name.toLowerCase(); + } + this.attribname = name; + }; + Parser.prototype.onattribdata = function (value) { + this.attribvalue += value; + }; + Parser.prototype.onattribend = function (quote) { + var _a, _b; + (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote); + if (this.attribs && + !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) { + this.attribs[this.attribname] = this.attribvalue; + } + this.attribname = ""; + this.attribvalue = ""; + }; + Parser.prototype.getInstructionName = function (value) { + var idx = value.search(reNameEnd); + var name = idx < 0 ? value : value.substr(0, idx); + if (this.lowerCaseTagNames) { + name = name.toLowerCase(); + } + return name; + }; + Parser.prototype.ondeclaration = function (value) { + if (this.cbs.onprocessinginstruction) { + var name_1 = this.getInstructionName(value); + this.cbs.onprocessinginstruction("!" + name_1, "!" + value); + } + }; + Parser.prototype.onprocessinginstruction = function (value) { + if (this.cbs.onprocessinginstruction) { + var name_2 = this.getInstructionName(value); + this.cbs.onprocessinginstruction("?" + name_2, "?" + value); + } + }; + Parser.prototype.oncomment = function (value) { + var _a, _b, _c, _d; + this.updatePosition(4); + (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value); + (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c); + }; + Parser.prototype.oncdata = function (value) { + var _a, _b, _c, _d, _e, _f; + this.updatePosition(1); + if (this.options.xmlMode || this.options.recognizeCDATA) { + (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a); + (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value); + (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e); + } + else { + this.oncomment("[CDATA[" + value + "]]"); + } + }; + Parser.prototype.onerror = function (err) { + var _a, _b; + (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, err); + }; + Parser.prototype.onend = function () { + var _a, _b; + if (this.cbs.onclosetag) { + for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i])) + ; + } + (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a); + }; + /** + * Resets the parser to a blank state, ready to parse a new HTML document + */ + Parser.prototype.reset = function () { + var _a, _b, _c, _d; + (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a); + this.tokenizer.reset(); + this.tagname = ""; + this.attribname = ""; + this.attribs = null; + this.stack = []; + (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this); + }; + /** + * Resets the parser, then parses a complete document and + * pushes it to the handler. + * + * @param data Document to parse. + */ + Parser.prototype.parseComplete = function (data) { + this.reset(); + this.end(data); + }; + /** + * Parses a chunk of data and calls the corresponding callbacks. + * + * @param chunk Chunk to parse. + */ + Parser.prototype.write = function (chunk) { + this.tokenizer.write(chunk); + }; + /** + * Parses the end of the buffer and clears the stack, calls onend. + * + * @param chunk Optional final chunk to parse. + */ + Parser.prototype.end = function (chunk) { + this.tokenizer.end(chunk); + }; + /** + * Pauses parsing. The parser won't emit events until `resume` is called. + */ + Parser.prototype.pause = function () { + this.tokenizer.pause(); + }; + /** + * Resumes parsing after `pause` was called. + */ + Parser.prototype.resume = function () { + this.tokenizer.resume(); + }; + /** + * Alias of `write`, for backwards compatibility. + * + * @param chunk Chunk to parse. + * @deprecated + */ + Parser.prototype.parseChunk = function (chunk) { + this.write(chunk); + }; + /** + * Alias of `end`, for backwards compatibility. + * + * @param chunk Optional final chunk to parse. + * @deprecated + */ + Parser.prototype.done = function (chunk) { + this.end(chunk); + }; + return Parser; +}()); +exports.Parser = Parser; diff --git a/node_modules/htmlparser2/lib/Tokenizer.d.ts b/node_modules/htmlparser2/lib/Tokenizer.d.ts new file mode 100644 index 0000000..b5549e6 --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.d.ts @@ -0,0 +1,182 @@ +/** All the states the tokenizer can be in. */ +declare const enum State { + Text = 1, + BeforeTagName = 2, + InTagName = 3, + InSelfClosingTag = 4, + BeforeClosingTagName = 5, + InClosingTagName = 6, + AfterClosingTagName = 7, + BeforeAttributeName = 8, + InAttributeName = 9, + AfterAttributeName = 10, + BeforeAttributeValue = 11, + InAttributeValueDq = 12, + InAttributeValueSq = 13, + InAttributeValueNq = 14, + BeforeDeclaration = 15, + InDeclaration = 16, + InProcessingInstruction = 17, + BeforeComment = 18, + InComment = 19, + InSpecialComment = 20, + AfterComment1 = 21, + AfterComment2 = 22, + BeforeCdata1 = 23, + BeforeCdata2 = 24, + BeforeCdata3 = 25, + BeforeCdata4 = 26, + BeforeCdata5 = 27, + BeforeCdata6 = 28, + InCdata = 29, + AfterCdata1 = 30, + AfterCdata2 = 31, + BeforeSpecialS = 32, + BeforeSpecialSEnd = 33, + BeforeScript1 = 34, + BeforeScript2 = 35, + BeforeScript3 = 36, + BeforeScript4 = 37, + BeforeScript5 = 38, + AfterScript1 = 39, + AfterScript2 = 40, + AfterScript3 = 41, + AfterScript4 = 42, + AfterScript5 = 43, + BeforeStyle1 = 44, + BeforeStyle2 = 45, + BeforeStyle3 = 46, + BeforeStyle4 = 47, + AfterStyle1 = 48, + AfterStyle2 = 49, + AfterStyle3 = 50, + AfterStyle4 = 51, + BeforeSpecialT = 52, + BeforeSpecialTEnd = 53, + BeforeTitle1 = 54, + BeforeTitle2 = 55, + BeforeTitle3 = 56, + BeforeTitle4 = 57, + AfterTitle1 = 58, + AfterTitle2 = 59, + AfterTitle3 = 60, + AfterTitle4 = 61, + BeforeEntity = 62, + BeforeNumericEntity = 63, + InNamedEntity = 64, + InNumericEntity = 65, + InHexEntity = 66 +} +export interface Callbacks { + onattribdata(value: string): void; + onattribend(quote: string | undefined | null): void; + onattribname(name: string): void; + oncdata(data: string): void; + onclosetag(name: string): void; + oncomment(data: string): void; + ondeclaration(content: string): void; + onend(): void; + onerror(error: Error, state?: State): void; + onopentagend(): void; + onopentagname(name: string): void; + onprocessinginstruction(instruction: string): void; + onselfclosingtag(): void; + ontext(value: string): void; +} +export default class Tokenizer { + /** The current state the tokenizer is in. */ + _state: State; + /** The read buffer. */ + private buffer; + /** The beginning of the section that is currently being read. */ + sectionStart: number; + /** The index within the buffer that we are currently looking at. */ + _index: number; + /** + * Data that has already been processed will be removed from the buffer occasionally. + * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. + */ + private bufferOffset; + /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ + private baseState; + /** For special parsing behavior inside of script and style tags. */ + private special; + /** Indicates whether the tokenizer has been paused. */ + private running; + /** Indicates whether the tokenizer has finished running / `.end` has been called. */ + private ended; + private readonly cbs; + private readonly xmlMode; + private readonly decodeEntities; + constructor(options: { + xmlMode?: boolean; + decodeEntities?: boolean; + } | null, cbs: Callbacks); + reset(): void; + write(chunk: string): void; + end(chunk?: string): void; + pause(): void; + resume(): void; + /** + * The current index within all of the written data. + */ + getAbsoluteIndex(): number; + private stateText; + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + private isTagStartChar; + private stateBeforeTagName; + private stateInTagName; + private stateBeforeClosingTagName; + private stateInClosingTagName; + private stateAfterClosingTagName; + private stateBeforeAttributeName; + private stateInSelfClosingTag; + private stateInAttributeName; + private stateAfterAttributeName; + private stateBeforeAttributeValue; + private handleInAttributeValue; + private stateInAttributeValueDoubleQuotes; + private stateInAttributeValueSingleQuotes; + private stateInAttributeValueNoQuotes; + private stateBeforeDeclaration; + private stateInDeclaration; + private stateInProcessingInstruction; + private stateBeforeComment; + private stateInComment; + private stateInSpecialComment; + private stateAfterComment1; + private stateAfterComment2; + private stateBeforeCdata6; + private stateInCdata; + private stateAfterCdata1; + private stateAfterCdata2; + private stateBeforeSpecialS; + private stateBeforeSpecialSEnd; + private stateBeforeSpecialLast; + private stateAfterSpecialLast; + private parseFixedEntity; + private parseLegacyEntity; + private stateInNamedEntity; + private decodeNumericEntity; + private stateInNumericEntity; + private stateInHexEntity; + private cleanup; + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + private parse; + private finish; + private handleTrailingData; + private getSection; + private emitToken; + private emitPartial; +} +export {}; +//# sourceMappingURL=Tokenizer.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Tokenizer.d.ts.map b/node_modules/htmlparser2/lib/Tokenizer.d.ts.map new file mode 100644 index 0000000..2e48b02 --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"Tokenizer.d.ts","sourceRoot":"","sources":["../src/Tokenizer.ts"],"names":[],"mappings":"AAKA,8CAA8C;AAC9C,mBAAW,KAAK;IACZ,IAAI,IAAI;IACR,aAAa,IAAA;IACb,SAAS,IAAA;IACT,gBAAgB,IAAA;IAChB,oBAAoB,IAAA;IACpB,gBAAgB,IAAA;IAChB,mBAAmB,IAAA;IAGnB,mBAAmB,IAAA;IACnB,eAAe,IAAA;IACf,kBAAkB,KAAA;IAClB,oBAAoB,KAAA;IACpB,kBAAkB,KAAA;IAClB,kBAAkB,KAAA;IAClB,kBAAkB,KAAA;IAGlB,iBAAiB,KAAA;IACjB,aAAa,KAAA;IAGb,uBAAuB,KAAA;IAGvB,aAAa,KAAA;IACb,SAAS,KAAA;IACT,gBAAgB,KAAA;IAChB,aAAa,KAAA;IACb,aAAa,KAAA;IAGb,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,OAAO,KAAA;IACP,WAAW,KAAA;IACX,WAAW,KAAA;IAGX,cAAc,KAAA;IACd,iBAAiB,KAAA;IAEjB,aAAa,KAAA;IACb,aAAa,KAAA;IACb,aAAa,KAAA;IACb,aAAa,KAAA;IACb,aAAa,KAAA;IACb,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IAEZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,WAAW,KAAA;IACX,WAAW,KAAA;IACX,WAAW,KAAA;IACX,WAAW,KAAA;IAEX,cAAc,KAAA;IACd,iBAAiB,KAAA;IACjB,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,YAAY,KAAA;IACZ,WAAW,KAAA;IACX,WAAW,KAAA;IACX,WAAW,KAAA;IACX,WAAW,KAAA;IAEX,YAAY,KAAA;IACZ,mBAAmB,KAAA;IACnB,aAAa,KAAA;IACb,eAAe,KAAA;IACf,WAAW,KAAA;CACd;AAiBD,MAAM,WAAW,SAAS;IACtB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;IACpD,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,KAAK,IAAI,IAAI,CAAC;IACd,OAAO,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC;IAC3C,YAAY,IAAI,IAAI,CAAC;IACrB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,gBAAgB,IAAI,IAAI,CAAC;IACzB,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAuGD,MAAM,CAAC,OAAO,OAAO,SAAS;IAC1B,6CAA6C;IAC7C,MAAM,QAAc;IACpB,uBAAuB;IACvB,OAAO,CAAC,MAAM,CAAM;IACpB,iEAAiE;IAC1D,YAAY,SAAK;IACxB,oEAAoE;IACpE,MAAM,SAAK;IACX;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAK;IACzB,kIAAkI;IAClI,OAAO,CAAC,SAAS,CAAc;IAC/B,oEAAoE;IACpE,OAAO,CAAC,OAAO,CAAgB;IAC/B,uDAAuD;IACvD,OAAO,CAAC,OAAO,CAAQ;IACvB,qFAAqF;IACrF,OAAO,CAAC,KAAK,CAAS;IAEtB,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAY;IAChC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAGrC,OAAO,EAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,IAAI,EAC/D,GAAG,EAAE,SAAS;IAOX,KAAK,IAAI,IAAI;IAYb,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAM1B,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;IAOzB,KAAK,IAAI,IAAI;IAIb,MAAM,IAAI,IAAI;IAUrB;;OAEG;IACI,gBAAgB,IAAI,MAAM;IAIjC,OAAO,CAAC,SAAS;IAoBjB;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAMtB,OAAO,CAAC,kBAAkB;IA8B1B,OAAO,CAAC,cAAc;IAOtB,OAAO,CAAC,yBAAyB;IAyBjC,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,wBAAwB;IAOhC,OAAO,CAAC,wBAAwB;IAYhC,OAAO,CAAC,qBAAqB;IAW7B,OAAO,CAAC,oBAAoB;IAQ5B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,yBAAyB;IAajC,OAAO,CAAC,sBAAsB;IAY9B,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,iCAAiC;IAGzC,OAAO,CAAC,6BAA6B;IAarC,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,4BAA4B;IAOpC,OAAO,CAAC,kBAAkB;IAQ1B,OAAO,CAAC,cAAc;IAGtB,OAAO,CAAC,qBAAqB;IAS7B,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,kBAAkB;IAa1B,OAAO,CAAC,iBAAiB;IASzB,OAAO,CAAC,YAAY;IAGpB,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,gBAAgB;IAaxB,OAAO,CAAC,mBAAmB;IAU3B,OAAO,CAAC,sBAAsB;IAO9B,OAAO,CAAC,sBAAsB;IAO9B,OAAO,CAAC,qBAAqB;IAS7B,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,iBAAiB;IAezB,OAAO,CAAC,kBAAkB;IA2B1B,OAAO,CAAC,mBAAmB;IAW3B,OAAO,CAAC,oBAAoB;IAY5B,OAAO,CAAC,gBAAgB;IAiBxB,OAAO,CAAC,OAAO;IA4Bf;;;;OAIG;IACH,OAAO,CAAC,KAAK;IAgJb,OAAO,CAAC,MAAM;IAQd,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,UAAU;IAGlB,OAAO,CAAC,SAAS;IAIjB,OAAO,CAAC,WAAW;CAOtB"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/Tokenizer.js b/node_modules/htmlparser2/lib/Tokenizer.js new file mode 100644 index 0000000..c6e9226 --- /dev/null +++ b/node_modules/htmlparser2/lib/Tokenizer.js @@ -0,0 +1,909 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint")); +var entities_json_1 = __importDefault(require("entities/lib/maps/entities.json")); +var legacy_json_1 = __importDefault(require("entities/lib/maps/legacy.json")); +var xml_json_1 = __importDefault(require("entities/lib/maps/xml.json")); +function whitespace(c) { + return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r"; +} +function isASCIIAlpha(c) { + return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z"); +} +function ifElseState(upper, SUCCESS, FAILURE) { + var lower = upper.toLowerCase(); + if (upper === lower) { + return function (t, c) { + if (c === lower) { + t._state = SUCCESS; + } + else { + t._state = FAILURE; + t._index--; + } + }; + } + return function (t, c) { + if (c === lower || c === upper) { + t._state = SUCCESS; + } + else { + t._state = FAILURE; + t._index--; + } + }; +} +function consumeSpecialNameChar(upper, NEXT_STATE) { + var lower = upper.toLowerCase(); + return function (t, c) { + if (c === lower || c === upper) { + t._state = NEXT_STATE; + } + else { + t._state = 3 /* InTagName */; + t._index--; // Consume the token again + } + }; +} +var stateBeforeCdata1 = ifElseState("C", 24 /* BeforeCdata2 */, 16 /* InDeclaration */); +var stateBeforeCdata2 = ifElseState("D", 25 /* BeforeCdata3 */, 16 /* InDeclaration */); +var stateBeforeCdata3 = ifElseState("A", 26 /* BeforeCdata4 */, 16 /* InDeclaration */); +var stateBeforeCdata4 = ifElseState("T", 27 /* BeforeCdata5 */, 16 /* InDeclaration */); +var stateBeforeCdata5 = ifElseState("A", 28 /* BeforeCdata6 */, 16 /* InDeclaration */); +var stateBeforeScript1 = consumeSpecialNameChar("R", 35 /* BeforeScript2 */); +var stateBeforeScript2 = consumeSpecialNameChar("I", 36 /* BeforeScript3 */); +var stateBeforeScript3 = consumeSpecialNameChar("P", 37 /* BeforeScript4 */); +var stateBeforeScript4 = consumeSpecialNameChar("T", 38 /* BeforeScript5 */); +var stateAfterScript1 = ifElseState("R", 40 /* AfterScript2 */, 1 /* Text */); +var stateAfterScript2 = ifElseState("I", 41 /* AfterScript3 */, 1 /* Text */); +var stateAfterScript3 = ifElseState("P", 42 /* AfterScript4 */, 1 /* Text */); +var stateAfterScript4 = ifElseState("T", 43 /* AfterScript5 */, 1 /* Text */); +var stateBeforeStyle1 = consumeSpecialNameChar("Y", 45 /* BeforeStyle2 */); +var stateBeforeStyle2 = consumeSpecialNameChar("L", 46 /* BeforeStyle3 */); +var stateBeforeStyle3 = consumeSpecialNameChar("E", 47 /* BeforeStyle4 */); +var stateAfterStyle1 = ifElseState("Y", 49 /* AfterStyle2 */, 1 /* Text */); +var stateAfterStyle2 = ifElseState("L", 50 /* AfterStyle3 */, 1 /* Text */); +var stateAfterStyle3 = ifElseState("E", 51 /* AfterStyle4 */, 1 /* Text */); +var stateBeforeSpecialT = consumeSpecialNameChar("I", 54 /* BeforeTitle1 */); +var stateBeforeTitle1 = consumeSpecialNameChar("T", 55 /* BeforeTitle2 */); +var stateBeforeTitle2 = consumeSpecialNameChar("L", 56 /* BeforeTitle3 */); +var stateBeforeTitle3 = consumeSpecialNameChar("E", 57 /* BeforeTitle4 */); +var stateAfterSpecialTEnd = ifElseState("I", 58 /* AfterTitle1 */, 1 /* Text */); +var stateAfterTitle1 = ifElseState("T", 59 /* AfterTitle2 */, 1 /* Text */); +var stateAfterTitle2 = ifElseState("L", 60 /* AfterTitle3 */, 1 /* Text */); +var stateAfterTitle3 = ifElseState("E", 61 /* AfterTitle4 */, 1 /* Text */); +var stateBeforeEntity = ifElseState("#", 63 /* BeforeNumericEntity */, 64 /* InNamedEntity */); +var stateBeforeNumericEntity = ifElseState("X", 66 /* InHexEntity */, 65 /* InNumericEntity */); +var Tokenizer = /** @class */ (function () { + function Tokenizer(options, cbs) { + var _a; + /** The current state the tokenizer is in. */ + this._state = 1 /* Text */; + /** The read buffer. */ + this.buffer = ""; + /** The beginning of the section that is currently being read. */ + this.sectionStart = 0; + /** The index within the buffer that we are currently looking at. */ + this._index = 0; + /** + * Data that has already been processed will be removed from the buffer occasionally. + * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. + */ + this.bufferOffset = 0; + /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ + this.baseState = 1 /* Text */; + /** For special parsing behavior inside of script and style tags. */ + this.special = 1 /* None */; + /** Indicates whether the tokenizer has been paused. */ + this.running = true; + /** Indicates whether the tokenizer has finished running / `.end` has been called. */ + this.ended = false; + this.cbs = cbs; + this.xmlMode = !!(options === null || options === void 0 ? void 0 : options.xmlMode); + this.decodeEntities = (_a = options === null || options === void 0 ? void 0 : options.decodeEntities) !== null && _a !== void 0 ? _a : true; + } + Tokenizer.prototype.reset = function () { + this._state = 1 /* Text */; + this.buffer = ""; + this.sectionStart = 0; + this._index = 0; + this.bufferOffset = 0; + this.baseState = 1 /* Text */; + this.special = 1 /* None */; + this.running = true; + this.ended = false; + }; + Tokenizer.prototype.write = function (chunk) { + if (this.ended) + this.cbs.onerror(Error(".write() after done!")); + this.buffer += chunk; + this.parse(); + }; + Tokenizer.prototype.end = function (chunk) { + if (this.ended) + this.cbs.onerror(Error(".end() after done!")); + if (chunk) + this.write(chunk); + this.ended = true; + if (this.running) + this.finish(); + }; + Tokenizer.prototype.pause = function () { + this.running = false; + }; + Tokenizer.prototype.resume = function () { + this.running = true; + if (this._index < this.buffer.length) { + this.parse(); + } + if (this.ended) { + this.finish(); + } + }; + /** + * The current index within all of the written data. + */ + Tokenizer.prototype.getAbsoluteIndex = function () { + return this.bufferOffset + this._index; + }; + Tokenizer.prototype.stateText = function (c) { + if (c === "<") { + if (this._index > this.sectionStart) { + this.cbs.ontext(this.getSection()); + } + this._state = 2 /* BeforeTagName */; + this.sectionStart = this._index; + } + else if (this.decodeEntities && + c === "&" && + (this.special === 1 /* None */ || this.special === 4 /* Title */)) { + if (this._index > this.sectionStart) { + this.cbs.ontext(this.getSection()); + } + this.baseState = 1 /* Text */; + this._state = 62 /* BeforeEntity */; + this.sectionStart = this._index; + } + }; + /** + * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. + * + * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). + * We allow anything that wouldn't end the tag. + */ + Tokenizer.prototype.isTagStartChar = function (c) { + return (isASCIIAlpha(c) || + (this.xmlMode && !whitespace(c) && c !== "/" && c !== ">")); + }; + Tokenizer.prototype.stateBeforeTagName = function (c) { + if (c === "/") { + this._state = 5 /* BeforeClosingTagName */; + } + else if (c === "<") { + this.cbs.ontext(this.getSection()); + this.sectionStart = this._index; + } + else if (c === ">" || + this.special !== 1 /* None */ || + whitespace(c)) { + this._state = 1 /* Text */; + } + else if (c === "!") { + this._state = 15 /* BeforeDeclaration */; + this.sectionStart = this._index + 1; + } + else if (c === "?") { + this._state = 17 /* InProcessingInstruction */; + this.sectionStart = this._index + 1; + } + else if (!this.isTagStartChar(c)) { + this._state = 1 /* Text */; + } + else { + this._state = + !this.xmlMode && (c === "s" || c === "S") + ? 32 /* BeforeSpecialS */ + : !this.xmlMode && (c === "t" || c === "T") + ? 52 /* BeforeSpecialT */ + : 3 /* InTagName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInTagName = function (c) { + if (c === "/" || c === ">" || whitespace(c)) { + this.emitToken("onopentagname"); + this._state = 8 /* BeforeAttributeName */; + this._index--; + } + }; + Tokenizer.prototype.stateBeforeClosingTagName = function (c) { + if (whitespace(c)) { + // Ignore + } + else if (c === ">") { + this._state = 1 /* Text */; + } + else if (this.special !== 1 /* None */) { + if (this.special !== 4 /* Title */ && (c === "s" || c === "S")) { + this._state = 33 /* BeforeSpecialSEnd */; + } + else if (this.special === 4 /* Title */ && + (c === "t" || c === "T")) { + this._state = 53 /* BeforeSpecialTEnd */; + } + else { + this._state = 1 /* Text */; + this._index--; + } + } + else if (!this.isTagStartChar(c)) { + this._state = 20 /* InSpecialComment */; + this.sectionStart = this._index; + } + else { + this._state = 6 /* InClosingTagName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInClosingTagName = function (c) { + if (c === ">" || whitespace(c)) { + this.emitToken("onclosetag"); + this._state = 7 /* AfterClosingTagName */; + this._index--; + } + }; + Tokenizer.prototype.stateAfterClosingTagName = function (c) { + // Skip everything until ">" + if (c === ">") { + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeAttributeName = function (c) { + if (c === ">") { + this.cbs.onopentagend(); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + else if (c === "/") { + this._state = 4 /* InSelfClosingTag */; + } + else if (!whitespace(c)) { + this._state = 9 /* InAttributeName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInSelfClosingTag = function (c) { + if (c === ">") { + this.cbs.onselfclosingtag(); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + this.special = 1 /* None */; // Reset special state, in case of self-closing special tags + } + else if (!whitespace(c)) { + this._state = 8 /* BeforeAttributeName */; + this._index--; + } + }; + Tokenizer.prototype.stateInAttributeName = function (c) { + if (c === "=" || c === "/" || c === ">" || whitespace(c)) { + this.cbs.onattribname(this.getSection()); + this.sectionStart = -1; + this._state = 10 /* AfterAttributeName */; + this._index--; + } + }; + Tokenizer.prototype.stateAfterAttributeName = function (c) { + if (c === "=") { + this._state = 11 /* BeforeAttributeValue */; + } + else if (c === "/" || c === ">") { + this.cbs.onattribend(undefined); + this._state = 8 /* BeforeAttributeName */; + this._index--; + } + else if (!whitespace(c)) { + this.cbs.onattribend(undefined); + this._state = 9 /* InAttributeName */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateBeforeAttributeValue = function (c) { + if (c === '"') { + this._state = 12 /* InAttributeValueDq */; + this.sectionStart = this._index + 1; + } + else if (c === "'") { + this._state = 13 /* InAttributeValueSq */; + this.sectionStart = this._index + 1; + } + else if (!whitespace(c)) { + this._state = 14 /* InAttributeValueNq */; + this.sectionStart = this._index; + this._index--; // Reconsume token + } + }; + Tokenizer.prototype.handleInAttributeValue = function (c, quote) { + if (c === quote) { + this.emitToken("onattribdata"); + this.cbs.onattribend(quote); + this._state = 8 /* BeforeAttributeName */; + } + else if (this.decodeEntities && c === "&") { + this.emitToken("onattribdata"); + this.baseState = this._state; + this._state = 62 /* BeforeEntity */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) { + this.handleInAttributeValue(c, '"'); + }; + Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) { + this.handleInAttributeValue(c, "'"); + }; + Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) { + if (whitespace(c) || c === ">") { + this.emitToken("onattribdata"); + this.cbs.onattribend(null); + this._state = 8 /* BeforeAttributeName */; + this._index--; + } + else if (this.decodeEntities && c === "&") { + this.emitToken("onattribdata"); + this.baseState = this._state; + this._state = 62 /* BeforeEntity */; + this.sectionStart = this._index; + } + }; + Tokenizer.prototype.stateBeforeDeclaration = function (c) { + this._state = + c === "[" + ? 23 /* BeforeCdata1 */ + : c === "-" + ? 18 /* BeforeComment */ + : 16 /* InDeclaration */; + }; + Tokenizer.prototype.stateInDeclaration = function (c) { + if (c === ">") { + this.cbs.ondeclaration(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateInProcessingInstruction = function (c) { + if (c === ">") { + this.cbs.onprocessinginstruction(this.getSection()); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateBeforeComment = function (c) { + if (c === "-") { + this._state = 19 /* InComment */; + this.sectionStart = this._index + 1; + } + else { + this._state = 16 /* InDeclaration */; + } + }; + Tokenizer.prototype.stateInComment = function (c) { + if (c === "-") + this._state = 21 /* AfterComment1 */; + }; + Tokenizer.prototype.stateInSpecialComment = function (c) { + if (c === ">") { + this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index)); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + }; + Tokenizer.prototype.stateAfterComment1 = function (c) { + if (c === "-") { + this._state = 22 /* AfterComment2 */; + } + else { + this._state = 19 /* InComment */; + } + }; + Tokenizer.prototype.stateAfterComment2 = function (c) { + if (c === ">") { + // Remove 2 trailing chars + this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index - 2)); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + else if (c !== "-") { + this._state = 19 /* InComment */; + } + // Else: stay in AFTER_COMMENT_2 (`--->`) + }; + Tokenizer.prototype.stateBeforeCdata6 = function (c) { + if (c === "[") { + this._state = 29 /* InCdata */; + this.sectionStart = this._index + 1; + } + else { + this._state = 16 /* InDeclaration */; + this._index--; + } + }; + Tokenizer.prototype.stateInCdata = function (c) { + if (c === "]") + this._state = 30 /* AfterCdata1 */; + }; + Tokenizer.prototype.stateAfterCdata1 = function (c) { + if (c === "]") + this._state = 31 /* AfterCdata2 */; + else + this._state = 29 /* InCdata */; + }; + Tokenizer.prototype.stateAfterCdata2 = function (c) { + if (c === ">") { + // Remove 2 trailing chars + this.cbs.oncdata(this.buffer.substring(this.sectionStart, this._index - 2)); + this._state = 1 /* Text */; + this.sectionStart = this._index + 1; + } + else if (c !== "]") { + this._state = 29 /* InCdata */; + } + // Else: stay in AFTER_CDATA_2 (`]]]>`) + }; + Tokenizer.prototype.stateBeforeSpecialS = function (c) { + if (c === "c" || c === "C") { + this._state = 34 /* BeforeScript1 */; + } + else if (c === "t" || c === "T") { + this._state = 44 /* BeforeStyle1 */; + } + else { + this._state = 3 /* InTagName */; + this._index--; // Consume the token again + } + }; + Tokenizer.prototype.stateBeforeSpecialSEnd = function (c) { + if (this.special === 2 /* Script */ && (c === "c" || c === "C")) { + this._state = 39 /* AfterScript1 */; + } + else if (this.special === 3 /* Style */ && (c === "t" || c === "T")) { + this._state = 48 /* AfterStyle1 */; + } + else + this._state = 1 /* Text */; + }; + Tokenizer.prototype.stateBeforeSpecialLast = function (c, special) { + if (c === "/" || c === ">" || whitespace(c)) { + this.special = special; + } + this._state = 3 /* InTagName */; + this._index--; // Consume the token again + }; + Tokenizer.prototype.stateAfterSpecialLast = function (c, sectionStartOffset) { + if (c === ">" || whitespace(c)) { + this.special = 1 /* None */; + this._state = 6 /* InClosingTagName */; + this.sectionStart = this._index - sectionStartOffset; + this._index--; // Reconsume the token + } + else + this._state = 1 /* Text */; + }; + // For entities terminated with a semicolon + Tokenizer.prototype.parseFixedEntity = function (map) { + if (map === void 0) { map = this.xmlMode ? xml_json_1.default : entities_json_1.default; } + // Offset = 1 + if (this.sectionStart + 1 < this._index) { + var entity = this.buffer.substring(this.sectionStart + 1, this._index); + if (Object.prototype.hasOwnProperty.call(map, entity)) { + this.emitPartial(map[entity]); + this.sectionStart = this._index + 1; + } + } + }; + // Parses legacy entities (without trailing semicolon) + Tokenizer.prototype.parseLegacyEntity = function () { + var start = this.sectionStart + 1; + // The max length of legacy entities is 6 + var limit = Math.min(this._index - start, 6); + while (limit >= 2) { + // The min length of legacy entities is 2 + var entity = this.buffer.substr(start, limit); + if (Object.prototype.hasOwnProperty.call(legacy_json_1.default, entity)) { + this.emitPartial(legacy_json_1.default[entity]); + this.sectionStart += limit + 1; + return; + } + limit--; + } + }; + Tokenizer.prototype.stateInNamedEntity = function (c) { + if (c === ";") { + this.parseFixedEntity(); + // Retry as legacy entity if entity wasn't parsed + if (this.baseState === 1 /* Text */ && + this.sectionStart + 1 < this._index && + !this.xmlMode) { + this.parseLegacyEntity(); + } + this._state = this.baseState; + } + else if ((c < "0" || c > "9") && !isASCIIAlpha(c)) { + if (this.xmlMode || this.sectionStart + 1 === this._index) { + // Ignore + } + else if (this.baseState !== 1 /* Text */) { + if (c !== "=") { + // Parse as legacy entity, without allowing additional characters. + this.parseFixedEntity(legacy_json_1.default); + } + } + else { + this.parseLegacyEntity(); + } + this._state = this.baseState; + this._index--; + } + }; + Tokenizer.prototype.decodeNumericEntity = function (offset, base, strict) { + var sectionStart = this.sectionStart + offset; + if (sectionStart !== this._index) { + // Parse entity + var entity = this.buffer.substring(sectionStart, this._index); + var parsed = parseInt(entity, base); + this.emitPartial(decode_codepoint_1.default(parsed)); + this.sectionStart = strict ? this._index + 1 : this._index; + } + this._state = this.baseState; + }; + Tokenizer.prototype.stateInNumericEntity = function (c) { + if (c === ";") { + this.decodeNumericEntity(2, 10, true); + } + else if (c < "0" || c > "9") { + if (!this.xmlMode) { + this.decodeNumericEntity(2, 10, false); + } + else { + this._state = this.baseState; + } + this._index--; + } + }; + Tokenizer.prototype.stateInHexEntity = function (c) { + if (c === ";") { + this.decodeNumericEntity(3, 16, true); + } + else if ((c < "a" || c > "f") && + (c < "A" || c > "F") && + (c < "0" || c > "9")) { + if (!this.xmlMode) { + this.decodeNumericEntity(3, 16, false); + } + else { + this._state = this.baseState; + } + this._index--; + } + }; + Tokenizer.prototype.cleanup = function () { + if (this.sectionStart < 0) { + this.buffer = ""; + this.bufferOffset += this._index; + this._index = 0; + } + else if (this.running) { + if (this._state === 1 /* Text */) { + if (this.sectionStart !== this._index) { + this.cbs.ontext(this.buffer.substr(this.sectionStart)); + } + this.buffer = ""; + this.bufferOffset += this._index; + this._index = 0; + } + else if (this.sectionStart === this._index) { + // The section just started + this.buffer = ""; + this.bufferOffset += this._index; + this._index = 0; + } + else { + // Remove everything unnecessary + this.buffer = this.buffer.substr(this.sectionStart); + this._index -= this.sectionStart; + this.bufferOffset += this.sectionStart; + } + this.sectionStart = 0; + } + }; + /** + * Iterates through the buffer, calling the function corresponding to the current state. + * + * States that are more likely to be hit are higher up, as a performance improvement. + */ + Tokenizer.prototype.parse = function () { + while (this._index < this.buffer.length && this.running) { + var c = this.buffer.charAt(this._index); + if (this._state === 1 /* Text */) { + this.stateText(c); + } + else if (this._state === 12 /* InAttributeValueDq */) { + this.stateInAttributeValueDoubleQuotes(c); + } + else if (this._state === 9 /* InAttributeName */) { + this.stateInAttributeName(c); + } + else if (this._state === 19 /* InComment */) { + this.stateInComment(c); + } + else if (this._state === 20 /* InSpecialComment */) { + this.stateInSpecialComment(c); + } + else if (this._state === 8 /* BeforeAttributeName */) { + this.stateBeforeAttributeName(c); + } + else if (this._state === 3 /* InTagName */) { + this.stateInTagName(c); + } + else if (this._state === 6 /* InClosingTagName */) { + this.stateInClosingTagName(c); + } + else if (this._state === 2 /* BeforeTagName */) { + this.stateBeforeTagName(c); + } + else if (this._state === 10 /* AfterAttributeName */) { + this.stateAfterAttributeName(c); + } + else if (this._state === 13 /* InAttributeValueSq */) { + this.stateInAttributeValueSingleQuotes(c); + } + else if (this._state === 11 /* BeforeAttributeValue */) { + this.stateBeforeAttributeValue(c); + } + else if (this._state === 5 /* BeforeClosingTagName */) { + this.stateBeforeClosingTagName(c); + } + else if (this._state === 7 /* AfterClosingTagName */) { + this.stateAfterClosingTagName(c); + } + else if (this._state === 32 /* BeforeSpecialS */) { + this.stateBeforeSpecialS(c); + } + else if (this._state === 21 /* AfterComment1 */) { + this.stateAfterComment1(c); + } + else if (this._state === 14 /* InAttributeValueNq */) { + this.stateInAttributeValueNoQuotes(c); + } + else if (this._state === 4 /* InSelfClosingTag */) { + this.stateInSelfClosingTag(c); + } + else if (this._state === 16 /* InDeclaration */) { + this.stateInDeclaration(c); + } + else if (this._state === 15 /* BeforeDeclaration */) { + this.stateBeforeDeclaration(c); + } + else if (this._state === 22 /* AfterComment2 */) { + this.stateAfterComment2(c); + } + else if (this._state === 18 /* BeforeComment */) { + this.stateBeforeComment(c); + } + else if (this._state === 33 /* BeforeSpecialSEnd */) { + this.stateBeforeSpecialSEnd(c); + } + else if (this._state === 53 /* BeforeSpecialTEnd */) { + stateAfterSpecialTEnd(this, c); + } + else if (this._state === 39 /* AfterScript1 */) { + stateAfterScript1(this, c); + } + else if (this._state === 40 /* AfterScript2 */) { + stateAfterScript2(this, c); + } + else if (this._state === 41 /* AfterScript3 */) { + stateAfterScript3(this, c); + } + else if (this._state === 34 /* BeforeScript1 */) { + stateBeforeScript1(this, c); + } + else if (this._state === 35 /* BeforeScript2 */) { + stateBeforeScript2(this, c); + } + else if (this._state === 36 /* BeforeScript3 */) { + stateBeforeScript3(this, c); + } + else if (this._state === 37 /* BeforeScript4 */) { + stateBeforeScript4(this, c); + } + else if (this._state === 38 /* BeforeScript5 */) { + this.stateBeforeSpecialLast(c, 2 /* Script */); + } + else if (this._state === 42 /* AfterScript4 */) { + stateAfterScript4(this, c); + } + else if (this._state === 43 /* AfterScript5 */) { + this.stateAfterSpecialLast(c, 6); + } + else if (this._state === 44 /* BeforeStyle1 */) { + stateBeforeStyle1(this, c); + } + else if (this._state === 29 /* InCdata */) { + this.stateInCdata(c); + } + else if (this._state === 45 /* BeforeStyle2 */) { + stateBeforeStyle2(this, c); + } + else if (this._state === 46 /* BeforeStyle3 */) { + stateBeforeStyle3(this, c); + } + else if (this._state === 47 /* BeforeStyle4 */) { + this.stateBeforeSpecialLast(c, 3 /* Style */); + } + else if (this._state === 48 /* AfterStyle1 */) { + stateAfterStyle1(this, c); + } + else if (this._state === 49 /* AfterStyle2 */) { + stateAfterStyle2(this, c); + } + else if (this._state === 50 /* AfterStyle3 */) { + stateAfterStyle3(this, c); + } + else if (this._state === 51 /* AfterStyle4 */) { + this.stateAfterSpecialLast(c, 5); + } + else if (this._state === 52 /* BeforeSpecialT */) { + stateBeforeSpecialT(this, c); + } + else if (this._state === 54 /* BeforeTitle1 */) { + stateBeforeTitle1(this, c); + } + else if (this._state === 55 /* BeforeTitle2 */) { + stateBeforeTitle2(this, c); + } + else if (this._state === 56 /* BeforeTitle3 */) { + stateBeforeTitle3(this, c); + } + else if (this._state === 57 /* BeforeTitle4 */) { + this.stateBeforeSpecialLast(c, 4 /* Title */); + } + else if (this._state === 58 /* AfterTitle1 */) { + stateAfterTitle1(this, c); + } + else if (this._state === 59 /* AfterTitle2 */) { + stateAfterTitle2(this, c); + } + else if (this._state === 60 /* AfterTitle3 */) { + stateAfterTitle3(this, c); + } + else if (this._state === 61 /* AfterTitle4 */) { + this.stateAfterSpecialLast(c, 5); + } + else if (this._state === 17 /* InProcessingInstruction */) { + this.stateInProcessingInstruction(c); + } + else if (this._state === 64 /* InNamedEntity */) { + this.stateInNamedEntity(c); + } + else if (this._state === 23 /* BeforeCdata1 */) { + stateBeforeCdata1(this, c); + } + else if (this._state === 62 /* BeforeEntity */) { + stateBeforeEntity(this, c); + } + else if (this._state === 24 /* BeforeCdata2 */) { + stateBeforeCdata2(this, c); + } + else if (this._state === 25 /* BeforeCdata3 */) { + stateBeforeCdata3(this, c); + } + else if (this._state === 30 /* AfterCdata1 */) { + this.stateAfterCdata1(c); + } + else if (this._state === 31 /* AfterCdata2 */) { + this.stateAfterCdata2(c); + } + else if (this._state === 26 /* BeforeCdata4 */) { + stateBeforeCdata4(this, c); + } + else if (this._state === 27 /* BeforeCdata5 */) { + stateBeforeCdata5(this, c); + } + else if (this._state === 28 /* BeforeCdata6 */) { + this.stateBeforeCdata6(c); + } + else if (this._state === 66 /* InHexEntity */) { + this.stateInHexEntity(c); + } + else if (this._state === 65 /* InNumericEntity */) { + this.stateInNumericEntity(c); + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + } + else if (this._state === 63 /* BeforeNumericEntity */) { + stateBeforeNumericEntity(this, c); + } + else { + this.cbs.onerror(Error("unknown _state"), this._state); + } + this._index++; + } + this.cleanup(); + }; + Tokenizer.prototype.finish = function () { + // If there is remaining data, emit it in a reasonable way + if (this.sectionStart < this._index) { + this.handleTrailingData(); + } + this.cbs.onend(); + }; + Tokenizer.prototype.handleTrailingData = function () { + var data = this.buffer.substr(this.sectionStart); + if (this._state === 29 /* InCdata */ || + this._state === 30 /* AfterCdata1 */ || + this._state === 31 /* AfterCdata2 */) { + this.cbs.oncdata(data); + } + else if (this._state === 19 /* InComment */ || + this._state === 21 /* AfterComment1 */ || + this._state === 22 /* AfterComment2 */) { + this.cbs.oncomment(data); + } + else if (this._state === 64 /* InNamedEntity */ && !this.xmlMode) { + this.parseLegacyEntity(); + if (this.sectionStart < this._index) { + this._state = this.baseState; + this.handleTrailingData(); + } + } + else if (this._state === 65 /* InNumericEntity */ && !this.xmlMode) { + this.decodeNumericEntity(2, 10, false); + if (this.sectionStart < this._index) { + this._state = this.baseState; + this.handleTrailingData(); + } + } + else if (this._state === 66 /* InHexEntity */ && !this.xmlMode) { + this.decodeNumericEntity(3, 16, false); + if (this.sectionStart < this._index) { + this._state = this.baseState; + this.handleTrailingData(); + } + } + else if (this._state !== 3 /* InTagName */ && + this._state !== 8 /* BeforeAttributeName */ && + this._state !== 11 /* BeforeAttributeValue */ && + this._state !== 10 /* AfterAttributeName */ && + this._state !== 9 /* InAttributeName */ && + this._state !== 13 /* InAttributeValueSq */ && + this._state !== 12 /* InAttributeValueDq */ && + this._state !== 14 /* InAttributeValueNq */ && + this._state !== 6 /* InClosingTagName */) { + this.cbs.ontext(data); + } + /* + * Else, ignore remaining data + * TODO add a way to remove current tag + */ + }; + Tokenizer.prototype.getSection = function () { + return this.buffer.substring(this.sectionStart, this._index); + }; + Tokenizer.prototype.emitToken = function (name) { + this.cbs[name](this.getSection()); + this.sectionStart = -1; + }; + Tokenizer.prototype.emitPartial = function (value) { + if (this.baseState !== 1 /* Text */) { + this.cbs.onattribdata(value); // TODO implement the new event + } + else { + this.cbs.ontext(value); + } + }; + return Tokenizer; +}()); +exports.default = Tokenizer; diff --git a/node_modules/htmlparser2/lib/WritableStream.d.ts b/node_modules/htmlparser2/lib/WritableStream.d.ts new file mode 100644 index 0000000..0755f99 --- /dev/null +++ b/node_modules/htmlparser2/lib/WritableStream.d.ts @@ -0,0 +1,16 @@ +/// <reference types="node" /> +import { Handler, ParserOptions } from "./Parser"; +import { Writable } from "stream"; +/** + * WritableStream makes the `Parser` interface available as a NodeJS stream. + * + * @see Parser + */ +export declare class WritableStream extends Writable { + private readonly _parser; + private readonly _decoder; + constructor(cbs: Partial<Handler>, options?: ParserOptions); + _write(chunk: string | Buffer, encoding: string, cb: () => void): void; + _final(cb: () => void): void; +} +//# sourceMappingURL=WritableStream.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/WritableStream.d.ts.map b/node_modules/htmlparser2/lib/WritableStream.d.ts.map new file mode 100644 index 0000000..31295a2 --- /dev/null +++ b/node_modules/htmlparser2/lib/WritableStream.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"WritableStream.d.ts","sourceRoot":"","sources":["../src/WritableStream.ts"],"names":[],"mappings":";AAAA,OAAO,EAAU,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAK1D,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAQlC;;;;GAIG;AACH,qBAAa,cAAe,SAAQ,QAAQ;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuB;gBAEpC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,EAAE,aAAa;IAK1D,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,IAAI,GAAG,IAAI;IAOtE,MAAM,CAAC,EAAE,EAAE,MAAM,IAAI,GAAG,IAAI;CAI/B"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/WritableStream.js b/node_modules/htmlparser2/lib/WritableStream.js new file mode 100644 index 0000000..ff87995 --- /dev/null +++ b/node_modules/htmlparser2/lib/WritableStream.js @@ -0,0 +1,53 @@ +"use strict"; +var __extends = (this && this.__extends) || (function () { + var extendStatics = function (d, b) { + extendStatics = Object.setPrototypeOf || + ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || + function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; + return extendStatics(d, b); + }; + return function (d, b) { + if (typeof b !== "function" && b !== null) + throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); + extendStatics(d, b); + function __() { this.constructor = d; } + d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); + }; +})(); +Object.defineProperty(exports, "__esModule", { value: true }); +exports.WritableStream = void 0; +var Parser_1 = require("./Parser"); +/* + * NOTE: If either of these two imports produces a type error, + * please update your @types/node dependency! + */ +var stream_1 = require("stream"); +var string_decoder_1 = require("string_decoder"); +// Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream +function isBuffer(_chunk, encoding) { + return encoding === "buffer"; +} +/** + * WritableStream makes the `Parser` interface available as a NodeJS stream. + * + * @see Parser + */ +var WritableStream = /** @class */ (function (_super) { + __extends(WritableStream, _super); + function WritableStream(cbs, options) { + var _this = _super.call(this, { decodeStrings: false }) || this; + _this._decoder = new string_decoder_1.StringDecoder(); + _this._parser = new Parser_1.Parser(cbs, options); + return _this; + } + WritableStream.prototype._write = function (chunk, encoding, cb) { + this._parser.write(isBuffer(chunk, encoding) ? this._decoder.write(chunk) : chunk); + cb(); + }; + WritableStream.prototype._final = function (cb) { + this._parser.end(this._decoder.end()); + cb(); + }; + return WritableStream; +}(stream_1.Writable)); +exports.WritableStream = WritableStream; diff --git a/node_modules/htmlparser2/lib/index.d.ts b/node_modules/htmlparser2/lib/index.d.ts new file mode 100644 index 0000000..997c6d9 --- /dev/null +++ b/node_modules/htmlparser2/lib/index.d.ts @@ -0,0 +1,39 @@ +import { Parser, ParserOptions } from "./Parser"; +export { Parser, ParserOptions }; +import { DomHandler, DomHandlerOptions, Node, Element, Document } from "domhandler"; +export { DomHandler, DomHandlerOptions }; +declare type Options = ParserOptions & DomHandlerOptions; +/** + * Parses the data, returns the resulting document. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + */ +export declare function parseDocument(data: string, options?: Options): Document; +/** + * Parses data, returns an array of the root nodes. + * + * Note that the root nodes still have a `Document` node as their parent. + * Use `parseDocument` to get the `Document` node instead. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + * @deprecated Use `parseDocument` instead. + */ +export declare function parseDOM(data: string, options?: Options): Node[]; +/** + * Creates a parser instance, with an attached DOM handler. + * + * @param cb A callback that will be called once parsing has been completed. + * @param options Optional options for the parser and DOM builder. + * @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM. + */ +export declare function createDomStream(cb: (error: Error | null, dom: Node[]) => void, options?: Options, elementCb?: (element: Element) => void): Parser; +export { default as Tokenizer, Callbacks as TokenizerCallbacks, } from "./Tokenizer"; +import * as ElementType from "domelementtype"; +export { ElementType }; +export * from "./FeedHandler"; +export * as DomUtils from "domutils"; +export { DomHandler as DefaultHandler }; +export { FeedHandler as RssHandler } from "./FeedHandler"; +//# sourceMappingURL=index.d.ts.map
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/index.d.ts.map b/node_modules/htmlparser2/lib/index.d.ts.map new file mode 100644 index 0000000..058fcba --- /dev/null +++ b/node_modules/htmlparser2/lib/index.d.ts.map @@ -0,0 +1 @@ +{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACjD,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AAEjC,OAAO,EACH,UAAU,EACV,iBAAiB,EACjB,IAAI,EACJ,OAAO,EACP,QAAQ,EACX,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,CAAC;AAEzC,aAAK,OAAO,GAAG,aAAa,GAAG,iBAAiB,CAAC;AAIjD;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,QAAQ,CAIvE;AACD;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,IAAI,EAAE,CAEhE;AACD;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC3B,EAAE,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,IAAI,EAC9C,OAAO,CAAC,EAAE,OAAO,EACjB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,GACvC,MAAM,CAGR;AAED,OAAO,EACH,OAAO,IAAI,SAAS,EACpB,SAAS,IAAI,kBAAkB,GAClC,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,WAAW,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,CAAC;AAOvB,cAAc,eAAe,CAAC;AAC9B,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC;AAGrC,OAAO,EAAE,UAAU,IAAI,cAAc,EAAE,CAAC;AACxC,OAAO,EAAE,WAAW,IAAI,UAAU,EAAE,MAAM,eAAe,CAAC"}
\ No newline at end of file diff --git a/node_modules/htmlparser2/lib/index.js b/node_modules/htmlparser2/lib/index.js new file mode 100644 index 0000000..3e461fa --- /dev/null +++ b/node_modules/htmlparser2/lib/index.js @@ -0,0 +1,84 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; +var __exportStar = (this && this.__exportStar) || function(m, exports) { + for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.RssHandler = exports.DefaultHandler = exports.DomUtils = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DomHandler = exports.Parser = void 0; +var Parser_1 = require("./Parser"); +Object.defineProperty(exports, "Parser", { enumerable: true, get: function () { return Parser_1.Parser; } }); +var domhandler_1 = require("domhandler"); +Object.defineProperty(exports, "DomHandler", { enumerable: true, get: function () { return domhandler_1.DomHandler; } }); +Object.defineProperty(exports, "DefaultHandler", { enumerable: true, get: function () { return domhandler_1.DomHandler; } }); +// Helper methods +/** + * Parses the data, returns the resulting document. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + */ +function parseDocument(data, options) { + var handler = new domhandler_1.DomHandler(undefined, options); + new Parser_1.Parser(handler, options).end(data); + return handler.root; +} +exports.parseDocument = parseDocument; +/** + * Parses data, returns an array of the root nodes. + * + * Note that the root nodes still have a `Document` node as their parent. + * Use `parseDocument` to get the `Document` node instead. + * + * @param data The data that should be parsed. + * @param options Optional options for the parser and DOM builder. + * @deprecated Use `parseDocument` instead. + */ +function parseDOM(data, options) { + return parseDocument(data, options).children; +} +exports.parseDOM = parseDOM; +/** + * Creates a parser instance, with an attached DOM handler. + * + * @param cb A callback that will be called once parsing has been completed. + * @param options Optional options for the parser and DOM builder. + * @param elementCb An optional callback that will be called every time a tag has been completed inside of the DOM. + */ +function createDomStream(cb, options, elementCb) { + var handler = new domhandler_1.DomHandler(cb, options, elementCb); + return new Parser_1.Parser(handler, options); +} +exports.createDomStream = createDomStream; +var Tokenizer_1 = require("./Tokenizer"); +Object.defineProperty(exports, "Tokenizer", { enumerable: true, get: function () { return __importDefault(Tokenizer_1).default; } }); +var ElementType = __importStar(require("domelementtype")); +exports.ElementType = ElementType; +/* + * All of the following exports exist for backwards-compatibility. + * They should probably be removed eventually. + */ +__exportStar(require("./FeedHandler"), exports); +exports.DomUtils = __importStar(require("domutils")); +var FeedHandler_1 = require("./FeedHandler"); +Object.defineProperty(exports, "RssHandler", { enumerable: true, get: function () { return FeedHandler_1.FeedHandler; } }); diff --git a/node_modules/htmlparser2/package.json b/node_modules/htmlparser2/package.json new file mode 100644 index 0000000..9b3a636 --- /dev/null +++ b/node_modules/htmlparser2/package.json @@ -0,0 +1,74 @@ +{ + "name": "htmlparser2", + "description": "Fast & forgiving HTML/XML parser", + "version": "6.1.0", + "author": "Felix Boehm <me@feedic.com>", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "sideEffects": false, + "keywords": [ + "html", + "parser", + "streams", + "xml", + "dom", + "rss", + "feed", + "atom" + ], + "repository": { + "type": "git", + "url": "git://github.com/fb55/htmlparser2.git" + }, + "directories": { + "lib": "lib/" + }, + "main": "lib/index.js", + "types": "lib/index.d.ts", + "files": [ + "lib/**/*" + ], + "scripts": { + "test": "jest --coverage", + "lint": "npm run lint:es && npm run lint:prettier", + "lint:es": "eslint src", + "lint:prettier": "npm run format:prettier:raw -- --check", + "format": "npm run format:es && npm run format:prettier", + "format:es": "npm run lint:es -- --fix", + "format:prettier": "npm run format:prettier:raw -- --write", + "format:prettier:raw": "prettier '**/*.{ts,md,json,yml}'", + "build": "tsc", + "prepare": "npm run build" + }, + "dependencies": { + "domelementtype": "^2.0.1", + "domhandler": "^4.0.0", + "domutils": "^2.5.2", + "entities": "^2.0.0" + }, + "devDependencies": { + "@types/jest": "^26.0.0", + "@types/node": "^14.0.5", + "@typescript-eslint/eslint-plugin": "^4.9.1", + "@typescript-eslint/parser": "^4.9.1", + "eslint": "^7.15.0", + "eslint-config-prettier": "^8.1.0", + "jest": "^26.0.1", + "prettier": "^2.1.1", + "ts-jest": "^26.0.0", + "typescript": "^4.0.2" + }, + "jest": { + "preset": "ts-jest", + "testEnvironment": "node" + }, + "prettier": { + "tabWidth": 4 + } +} |