aboutsummaryrefslogtreecommitdiff
path: root/update/ponies
diff options
context:
space:
mode:
authorMinteck <contact@minteck.org>2022-02-13 16:16:18 +0100
committerMinteck <contact@minteck.org>2022-02-13 16:16:18 +0100
commit327119b4d1c2248b8a075cad3cd05ab92560e75d (patch)
treece143399e74c120f7311e75490efd0defd9b58d8 /update/ponies
parentede8d0750f3f16e3ba5c3c3f716c98d267512b09 (diff)
downloadponyfind-327119b4d1c2248b8a075cad3cd05ab92560e75d.tar.gz
ponyfind-327119b4d1c2248b8a075cad3cd05ab92560e75d.tar.bz2
ponyfind-327119b4d1c2248b8a075cad3cd05ab92560e75d.zip
Feature: implements #8, voids #9
Diffstat (limited to 'update/ponies')
-rw-r--r--update/ponies/dict.js41
-rw-r--r--update/ponies/index.js5
-rw-r--r--update/ponies/infobox.js71
-rw-r--r--update/ponies/listgen.js24
-rw-r--r--update/ponies/pages.js28
-rw-r--r--update/ponies/parse.js115
6 files changed, 284 insertions, 0 deletions
diff --git a/update/ponies/dict.js b/update/ponies/dict.js
new file mode 100644
index 0000000..88ec568
--- /dev/null
+++ b/update/ponies/dict.js
@@ -0,0 +1,41 @@
+const fs = require('fs');
+
+console.log("Optimizing search engine...");
+
+let search = {
+ entries: null,
+ associations: []
+}
+
+global.knownAssociations = [];
+
+for (let page of JSON.parse(fs.readFileSync("./data/pages.json").toString())) {
+ if (!knownAssociations.includes(page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim())) {
+ knownAssociations.push(page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim());
+ search.associations.push({
+ title: page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
+ endpoint: page.name
+ });
+ }
+
+ if (typeof JSON.parse(fs.readFileSync("./data/data.json").toString())[page.name] !== "undefined") {
+ for (let nick of JSON.parse(fs.readFileSync("./data/data.json").toString())[page.name].names) {
+ if (!knownAssociations.includes(nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim())) {
+ knownAssociations.push(nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim());
+ search.associations.push({
+ title: nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
+ endpoint: page.name
+ });
+ }
+ }
+ }
+}
+
+search.entries = [];
+for (let association of search.associations) {
+ search.entries.push(association.title);
+}
+search.entries = [...new Set(search.entries)];
+fs.writeFileSync("./data/search.json", JSON.stringify(search, null, 4));
+
+console.log(JSON.parse(fs.readFileSync("./data/pages.json").toString()).length + " known characters"); \ No newline at end of file
diff --git a/update/ponies/index.js b/update/ponies/index.js
new file mode 100644
index 0000000..53acf34
--- /dev/null
+++ b/update/ponies/index.js
@@ -0,0 +1,5 @@
+require('child_process').spawnSync("node", ["update/listgen.js"], { cwd: __dirname + "/..", stdio: "inherit" })
+require('child_process').spawnSync("node", ["update/pages.js"], { cwd: __dirname + "/..", stdio: "inherit" })
+require('child_process').spawnSync("node", ["update/infobox.js"], { cwd: __dirname + "/..", stdio: "inherit" })
+require('child_process').spawnSync("node", ["update/parse.js"], { cwd: __dirname + "/..", stdio: "inherit" })
+require('child_process').spawnSync("node", ["update/dict.js"], { cwd: __dirname + "/..", stdio: "inherit" }) \ No newline at end of file
diff --git a/update/ponies/infobox.js b/update/ponies/infobox.js
new file mode 100644
index 0000000..f53be0e
--- /dev/null
+++ b/update/ponies/infobox.js
@@ -0,0 +1,71 @@
+const fs = require('fs');
+const axios = require("axios");
+const WikiTextParser = require('parse-wikitext');
+const parser = new WikiTextParser("mlp.fandom.com");
+
+console.log("Gathering infobox for each page...");
+
+(async () => {
+ let infoboxes = {};
+ for (let page of JSON.parse(fs.readFileSync("./data/pages.json").toString())) {
+ console.log("Gathering infobox for '" + page.name + "'...");
+ try {
+ let data = (await axios.get("https://mlp.fandom.com/api.php?action=query&prop=revisions&titles=" + page.name + "&rvslots=*&rvprop=content&formatversion=2&format=json")).data;
+ let mwextracts = (await axios.get("https://mlp.fandom.com/api.php?format=json&action=query&prop=extracts&exlimit=max&explaintext&exintro&titles=" + encodeURI(page.name) + "&redirects=")).data;
+ let mwtext = (await axios.get("https://mlp.fandom.com/api.php?format=json&action=query&prop=extracts&exlimit=max&explaintext&titles=" + encodeURI(page.name) + "&redirects=")).data;
+ let extracts = "";
+ try {
+ sentences = mwextracts.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.trim().replace(/(.*)\n(.*)/, "$2").replace(/(.*)\n\n(.*)/gm, "$2").replace(/([.?!])\s*(?=[A-Z])/g, "$1|").split("|");
+ extracts = sentences[0];
+ if (extracts.length < 150 && sentences.length > 1) {
+ extracts = sentences[0] + " " + sentences[1];
+ if (extracts.length < 150 && sentences.length > 2) {
+ extracts = sentences[0] + " " + sentences[1] + " " + sentences[2];
+ }
+ }
+ } catch (e) {
+ extracts = "";
+ }
+ let extracts_fr = extracts;
+ if (fs.existsSync("./modules/translate.php")) {
+ try {
+ extracts_fr = require('child_process').spawnSync("php", [ "translate.php", extracts ], { cwd: "./modules" }).stdout.toString()
+ } catch (e) {
+ extracts_fr = extracts;
+ }
+ } else {
+ extracts_fr = extracts;
+ }
+ if (data.query.pages.length > 0) {
+ console.log("Results found, adding name to database")
+ sections = parser.pageToSectionObject(data.query.pages[0].revisions[0].slots.main.content);
+ box = parser.parseInfoBox(sections["content"]);
+ if (box.template === "Infobox character") {
+ infoboxes[page.name] = parser.parseInfoBox(sections["content"]).values;
+ infoboxes[page.name]["_extract"] = extracts;
+ infoboxes[page.name]["_extract_fr"] = extracts_fr;
+ }
+ } else {
+ console.log("No results found, ignoring name");
+ }
+ try {
+ if (mwtext.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.toLowerCase().includes("friendship is magic")
+ || mwtext.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.toLowerCase().includes("fim")
+ ) {
+ infoboxes[page.name]["_gen"] = 4;
+ } else if (mwtext.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.toLowerCase().includes("a new generation")
+ || mwtext.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.replace(/[.,?!;()"'-]/g, " ").replace(/\s+/g, " ").toLowerCase().split(" ").includes("ang")
+ || mwtext.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.replace(/[.,?!;()"'-]/g, " ").replace(/\s+/g, " ").toLowerCase().split(" ").includes("ang")
+ ) {
+ infoboxes[page.name]["_gen"] = 5;
+ } else {
+ infoboxes[page.name]["_gen"] = -1;
+ }
+ } catch(e) {}
+ } catch (e) {
+ console.error(e);
+ }
+ }
+
+ fs.writeFileSync("./data/boxes.json", JSON.stringify(infoboxes, null, 4))
+})() \ No newline at end of file
diff --git a/update/ponies/listgen.js b/update/ponies/listgen.js
new file mode 100644
index 0000000..d9401f8
--- /dev/null
+++ b/update/ponies/listgen.js
@@ -0,0 +1,24 @@
+const axios = require("axios");
+const fs = require("fs");
+
+if (fs.existsSync("./data")) fs.rmSync("./data", { recursive: true });
+fs.mkdirSync("./data");
+
+(async () => {
+ async function getCategory(category) {
+ console.log("Category:" + category);
+ let cat = (await axios.get("https://mlp.fandom.com/api.php?action=query&generator=categorymembers&gcmtitle=Category:" + encodeURI(category) + "&prop=categories&cllimit=max&gcmlimit=max&format=json")).data;
+
+ return Object.keys(cat.query.pages).map(k => cat.query.pages[k].title).filter(k => !k.startsWith("List") && !k.includes("EG") && !k.toLowerCase().includes("ponies") && !k.includes(" and ") && !k.includes("(") && !k.includes("family"));
+ }
+
+ let list = [...new Set([
+ ...(await getCategory("Pegasus ponies")),
+ ...(await getCategory("Alicorn ponies")),
+ ...(await getCategory("Earth ponies")),
+ ...(await getCategory("Unicorn ponies")),
+ ...(await getCategory("Main characters")),
+ ...(await getCategory("Dragons")),
+ ])];
+ fs.writeFileSync("./data/list.json", JSON.stringify(list, null, 4))
+})() \ No newline at end of file
diff --git a/update/ponies/pages.js b/update/ponies/pages.js
new file mode 100644
index 0000000..7f3a1c8
--- /dev/null
+++ b/update/ponies/pages.js
@@ -0,0 +1,28 @@
+const fs = require('fs');
+const axios = require('axios');
+
+(async () => {
+ console.log("Gathering pages list...");
+ let pages = [];
+ for (let page of JSON.parse(fs.readFileSync("./data/list.json").toString())) {
+ console.log("Searching for '" + page + "'...");
+ try {
+ let data = (await axios.get("https://mlp.fandom.com/api.php?action=query&list=search&srsearch=" + encodeURI(page) + "&srlimit=1&srenablerewrites=true&format=json")).data;
+ if (data.query.search.length > 0) {
+ console.log("Results found, adding name to database")
+ pages.push({
+ query: page,
+ name: data.query.search[0].title,
+ mwid: data.query.search[0].pageid,
+ words: data.query.search[0].wordcount,
+ })
+ } else {
+ console.log("No results found, ignoring name");
+ }
+ } catch (e) {
+ console.error(e);
+ }
+ }
+
+ fs.writeFileSync("./data/pages.json", JSON.stringify(pages, null, 4))
+})() \ No newline at end of file
diff --git a/update/ponies/parse.js b/update/ponies/parse.js
new file mode 100644
index 0000000..bd06081
--- /dev/null
+++ b/update/ponies/parse.js
@@ -0,0 +1,115 @@
+const fs = require('fs');
+const axios = require('axios');
+
+console.log("Parsing infobox data...");
+
+let ponies = {};
+
+(async () => {
+ for (let title in JSON.parse(fs.readFileSync("./data/boxes.json").toString())) {
+ console.log("Parsing " + title + "...");
+ let box = JSON.parse(fs.readFileSync("./data/boxes.json").toString())[title];
+ let data = {
+ names: [title],
+ extract: "",
+ extract_fr: "",
+ generation: -1,
+ color: "000000",
+ image: "https://example.com",
+ kind: "Pony",
+ sex: "Unknown",
+ occupation: ["Unknown"],
+ residence: ["Unknown"],
+ mark: "https://example.com"
+ }
+
+ if (typeof box._gen !== "undefined") data.generation = box._gen;
+
+ if (typeof box.name2 !== "undefined") data.names.push(box.name2.replace(/<!--[\s\S]*?-->/g, ""));
+ if (typeof box.name3 !== "undefined") data.names.push(box.name3.replace(/<!--[\s\S]*?-->/g, ""));
+ if (typeof box.name4 !== "undefined") data.names.push(box.name4.replace(/<!--[\s\S]*?-->/g, ""));
+ if (typeof box.name5 !== "undefined") data.names.push(box.name5.replace(/<!--[\s\S]*?-->/g, ""));
+
+ if (typeof box.nicknames !== "undefined") {
+ box.nicknames.replace(/<!--[\s\S]*?-->/g, "").split(",").filter(e => !e.match(/[^a-zA-Z0-9-_ ]/gm)).forEach((e, i) => {
+ data.names.push(e.trim());
+ });
+ }
+
+ if (typeof box.kind !== "undefined") {
+ kp = box.kind.replace(/<!--[\s\S]*?-->/g, "").replace(/[^a-zA-Z0-9-_ ]/gm, "").split(" ")[0];
+ data.kind = kp.substring(kp.replace(/([A-Z])([a-z0-9]*)$/g, "").length);
+ }
+ if (typeof box.sex !== "undefined") data.sex = box.sex.replace(/<!--[\s\S]*?-->/g, "").trim().startsWith("F") ? "F" : "M";
+ if (typeof box._extract !== "undefined") {
+ data.extract = box._extract;
+ }
+ if (typeof box.coat !== "undefined") data.color = box.coat.replace(/<!--[\s\S]*?-->/g, "").trim().replace(/\[([a-z.\/ \nA-Z0-9:]*)\/(.{6})\/ (.*)\]/gm, "$2").replace(/{{perbang\|([0-9A-Fa-f].{5})(.*)/g, "$1");
+ try {
+ if (typeof box.main !== "undefined") data.image = (await axios.head("https://mlp.fandom.com/Special:FilePath/" + encodeURI(box.main.replace(/<!--[\s\S]*?-->/g, "").trim()).replaceAll("?", "%3F").replaceAll("&", "%26"))).request.res.responseUrl;
+ } catch (e) {
+ try {
+ if (typeof box.main !== "undefined") data.image = e.request.res.responseUrl;
+ } catch (e2) {
+ console.error(e2);
+ throw e;
+ }
+ }
+ try {
+ if (typeof box.main1 !== "undefined") data.image = (await axios.head("https://mlp.fandom.com/Special:FilePath/" + encodeURI(box.main1.replace(/<!--[\s\S]*?-->/g, "").trim()).replaceAll("?", "%3F").replaceAll("&", "%26"))).request.res.responseUrl;
+ } catch (e) {
+ try {
+ if (typeof box.main1 !== "undefined") data.image = e.request.res.responseUrl;
+ } catch (e2) {
+ console.error(e2);
+ throw e;
+ }
+ }
+ try {
+ if (typeof box.image !== "undefined") data.image = (await axios.head("https://mlp.fandom.com/Special:FilePath/" + encodeURI(box.image.replace(/<!--[\s\S]*?-->/g, "").trim()).replaceAll("?", "%3F").replaceAll("&", "%26"))).request.res.responseUrl;
+ } catch (e) {
+ try {
+ if (typeof box.image !== "undefined") data.image = e.request.res.responseUrl;
+ } catch (e2) {
+ console.error(e2);
+ throw e;
+ }
+ }
+ if (typeof box["cutie mark"] !== "undefined") {
+ try {
+ data.markimg = box["cutie mark"].replace(/<!--[\s\S]*?-->/g, "").trim().split("[[File:")[1].split("|")[0];
+ } catch (e) {
+ data.markimg = box["cutie mark"].replace(/<!--[\s\S]*?-->/g, "").trim().split("[[File:")[0].split("|")[0];
+ }
+ try {
+ data.mark = (await axios.head("https://mlp.fandom.com/Special:FilePath/" + encodeURI(data.markimg).replaceAll("?", "%3F").replaceAll("&", "%26"))).request.res.responseUrl;
+ } catch (e) {
+ try {
+ data.mark = e.request.res.responseUrl;
+ } catch (e2) {
+ console.error(e2);
+ throw e;
+ }
+ }
+ }
+
+ if (typeof box.occupation !== "undefined") {
+ occupations = [];
+ box.occupation/*.replace(/\[\[(.*)_(.*)\]\]|\[\[(.*)\|(.*)\]\]|\[\[(.*)\]\]/gm, "$2$4$5")*/.replace(/<!--[\s\S]*?-->/g, "").trim().replace(/\|/gm, "_").replace(/<( ||(|| )\/)( ||(|| )\/)(b|B)(r|R)( ||(|| )\/)( ||(|| )\/)>/gm, "|").replace(/( \|| \| | \| )/gm, "|").split("|").forEach((e) => {
+ occupations.push(e.trim().replace(/[\[\]]/gm, "").replace(/<(.*)>/gm, "").replace(/([a-zA-Z0-9 \-_,.'"]*)_([a-zA-Z0-9 \-_,.'"]*)/gm, "$2"));
+ })
+ data.occupation = occupations;
+ }
+ if (typeof box.residence !== "undefined") {
+ residences = [];
+ box.residence/*.replace(/\[\[(.*)_(.*)\]\]|\[\[(.*)\|(.*)\]\]|\[\[(.*)\]\]/gm, "$2$4$5")*/.replace(/<!--[\s\S]*?-->/g, "").trim().replace(/\|/gm, "_").replace(/<( ||(|| )\/)( ||(|| )\/)(b|B)(r|R)( ||(|| )\/)( ||(|| )\/)>/gm, "|").replace(/( \|| \| | \| )/gm, "|").split("|").forEach((e) => {
+ residences.push(e.trim().replace(/[\[\]]/gm, "").replace(/<(.*)>/gm, "").replace(/([a-zA-Z0-9 \-_,.'"]*)_([a-zA-Z0-9 \-_,.'"]*)/gm, "$2"));
+ })
+ data.residence = residences.replaceAll("Locations#", "");
+ }
+
+ if ((typeof box.name2 !== "undefined" || typeof box.name3 !== "undefined" || typeof box.name4 !== "undefined" || typeof box.name5 !== "undefined" || typeof box.coat !== "undefined" || typeof box.occupation !== "undefined" || typeof box.residence !== "undefined") && typeof box.sex !== "undefined") ponies[title] = data;
+ }
+})()
+
+fs.writeFileSync("./data/data.json", JSON.stringify(ponies, null, 4)); \ No newline at end of file