1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const extract_1 = require("./extract");
const request_1 = require("./request");
const utils = require("./utils");
/**
* sets up options for the got request and calls extract on html
*
* @param {object} options - options for ogs
* @return {object} object with ogs results
*
*/
async function setOptionsAndReturnOpenGraphResults(options) {
const { ogsOptions, gotOptions } = utils.optionSetupAndSplit(options);
if (ogsOptions.html) {
if (ogsOptions.url)
throw new Error('Must specify either `url` or `html`, not both');
const ogObject = (0, extract_1.default)(ogsOptions.html, ogsOptions, null);
ogObject.requestUrl = null;
ogObject.success = true;
return { ogObject, response: { body: ogsOptions.html } };
}
const formattedUrl = utils.validateAndFormatURL(ogsOptions.url, ogsOptions.urlValidatorSettings);
if (!formattedUrl.url)
throw new Error('Invalid URL');
ogsOptions.url = formattedUrl.url;
gotOptions.url = formattedUrl.url;
// trying to limit non html pages
if (utils.isThisANonHTMLUrl(ogsOptions.url))
throw new Error('Must scrape an HTML page');
// eslint-disable-next-line max-len
if (ogsOptions.blacklist && ogsOptions.blacklist.some((blacklistedHostname) => ogsOptions.url.includes(blacklistedHostname))) {
throw new Error('Host name has been black listed');
}
try {
const { decodedBody, response } = await (0, request_1.default)(gotOptions, ogsOptions);
const ogObject = (0, extract_1.default)(decodedBody, ogsOptions, response.rawBody);
ogObject.requestUrl = ogsOptions.url;
ogObject.success = true;
return { ogObject, response };
}
catch (exception) {
if (exception && (exception.code === 'ENOTFOUND' || exception.code === 'EHOSTUNREACH' || exception.code === 'ENETUNREACH')) {
throw new Error('Page not found');
}
else if (exception && (exception.code === 'ERR_INVALID_URL' || exception.code === 'EINVAL')) {
throw new Error('Page not found');
}
else if (exception && exception.code === 'ETIMEDOUT') {
throw new Error('Time out');
}
else if (exception && exception.message && exception.message.startsWith('Response code 5')) {
throw new Error('Web server is returning error');
}
else if (exception && exception.message && exception.message === 'Promise was canceled') {
throw new Error(`Exceeded the download limit of ${ogsOptions.downloadLimit} bytes`);
}
if (exception instanceof Error)
throw exception;
throw new Error('Page not found');
}
}
exports.default = setOptionsAndReturnOpenGraphResults;
|