aboutsummaryrefslogtreecommitdiff
path: root/update
diff options
context:
space:
mode:
authorMinteck <contact@minteck.org>2022-01-08 21:47:45 +0100
committerMinteck <contact@minteck.org>2022-01-08 21:47:45 +0100
commitbe6c790e43be36cfa6669110c7064a67ad3c4623 (patch)
tree3e81dd5a8caaf3f9c9db6b21f858167f01983119 /update
parentac2dea4a23ca72001e41c1dbf66ff1a0d01e217c (diff)
downloadponyfind-be6c790e43be36cfa6669110c7064a67ad3c4623.tar.gz
ponyfind-be6c790e43be36cfa6669110c7064a67ad3c4623.tar.bz2
ponyfind-be6c790e43be36cfa6669110c7064a67ad3c4623.zip
Update
Diffstat (limited to 'update')
-rw-r--r--update/dict.js32
-rw-r--r--update/index.js1
-rw-r--r--update/infobox.js19
-rw-r--r--update/listgen.js24
-rw-r--r--update/pages.js5
-rw-r--r--update/parse.js25
6 files changed, 83 insertions, 23 deletions
diff --git a/update/dict.js b/update/dict.js
index 24eb400..88ec568 100644
--- a/update/dict.js
+++ b/update/dict.js
@@ -7,22 +7,26 @@ let search = {
associations: []
}
+global.knownAssociations = [];
+
for (let page of JSON.parse(fs.readFileSync("./data/pages.json").toString())) {
- search.associations.push({
- title: page.query.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
- endpoint: page.name
- });
- search.associations.push({
- title: page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
- endpoint: page.name
- });
+ if (!knownAssociations.includes(page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim())) {
+ knownAssociations.push(page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim());
+ search.associations.push({
+ title: page.name.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
+ endpoint: page.name
+ });
+ }
if (typeof JSON.parse(fs.readFileSync("./data/data.json").toString())[page.name] !== "undefined") {
for (let nick of JSON.parse(fs.readFileSync("./data/data.json").toString())[page.name].names) {
- search.associations.push({
- title: nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
- endpoint: page.name
- });
+ if (!knownAssociations.includes(nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim())) {
+ knownAssociations.push(nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim());
+ search.associations.push({
+ title: nick.toLowerCase().replace(/[^a-z]/gm, " ").replace(/\s\s+/g, " ").trim(),
+ endpoint: page.name
+ });
+ }
}
}
}
@@ -32,4 +36,6 @@ for (let association of search.associations) {
search.entries.push(association.title);
}
search.entries = [...new Set(search.entries)];
-fs.writeFileSync("./data/search.json", JSON.stringify(search, null, 4)); \ No newline at end of file
+fs.writeFileSync("./data/search.json", JSON.stringify(search, null, 4));
+
+console.log(JSON.parse(fs.readFileSync("./data/pages.json").toString()).length + " known characters"); \ No newline at end of file
diff --git a/update/index.js b/update/index.js
index 7121c2d..53acf34 100644
--- a/update/index.js
+++ b/update/index.js
@@ -1,3 +1,4 @@
+require('child_process').spawnSync("node", ["update/listgen.js"], { cwd: __dirname + "/..", stdio: "inherit" })
require('child_process').spawnSync("node", ["update/pages.js"], { cwd: __dirname + "/..", stdio: "inherit" })
require('child_process').spawnSync("node", ["update/infobox.js"], { cwd: __dirname + "/..", stdio: "inherit" })
require('child_process').spawnSync("node", ["update/parse.js"], { cwd: __dirname + "/..", stdio: "inherit" })
diff --git a/update/infobox.js b/update/infobox.js
index 26a989d..b9d2591 100644
--- a/update/infobox.js
+++ b/update/infobox.js
@@ -11,12 +11,31 @@ console.log("Gathering infobox for each page...");
console.log("Gathering infobox for '" + page.name + "'...");
try {
let data = (await axios.get("https://mlp.fandom.com/api.php?action=query&prop=revisions&titles=" + page.name + "&rvslots=*&rvprop=content&formatversion=2&format=json")).data;
+ let mwextracts = (await axios.get("https://mlp.fandom.com/api.php?format=json&action=query&prop=extracts&exlimit=max&explaintext&exintro&titles=" + encodeURI(page.name) + "&redirects=")).data;
+ let extracts = "";
+ try {
+ extracts = mwextracts.query.pages[Object.keys(mwextracts.query.pages)[0]].extract.replace(/(.*)\n\n(.*)/gm, "$2").replace(/([.?!])\s*(?=[A-Z])/g, "$1|").split("|")[0];
+ } catch (e) {
+ extracts = "";
+ }
+ let extracts_fr = extracts;
+ if (fs.existsSync("./modules/translate.php")) {
+ try {
+ extracts_fr = require('child_process').spawnSync("php", [ "translate.php", extracts ], { cwd: "./modules" }).stdout.toString()
+ } catch (e) {
+ extracts_fr = extracts;
+ }
+ } else {
+ extracts_fr = extracts;
+ }
if (data.query.pages.length > 0) {
console.log("Results found, adding name to database")
sections = parser.pageToSectionObject(data.query.pages[0].revisions[0].slots.main.content);
box = parser.parseInfoBox(sections["content"]);
if (box.template === "Infobox character") {
infoboxes[page.name] = parser.parseInfoBox(sections["content"]).values;
+ infoboxes[page.name]["_extract"] = extracts;
+ infoboxes[page.name]["_extract_fr"] = extracts_fr;
}
} else {
console.log("No results found, ignoring name");
diff --git a/update/listgen.js b/update/listgen.js
new file mode 100644
index 0000000..d9401f8
--- /dev/null
+++ b/update/listgen.js
@@ -0,0 +1,24 @@
+const axios = require("axios");
+const fs = require("fs");
+
+if (fs.existsSync("./data")) fs.rmSync("./data", { recursive: true });
+fs.mkdirSync("./data");
+
+(async () => {
+ async function getCategory(category) {
+ console.log("Category:" + category);
+ let cat = (await axios.get("https://mlp.fandom.com/api.php?action=query&generator=categorymembers&gcmtitle=Category:" + encodeURI(category) + "&prop=categories&cllimit=max&gcmlimit=max&format=json")).data;
+
+ return Object.keys(cat.query.pages).map(k => cat.query.pages[k].title).filter(k => !k.startsWith("List") && !k.includes("EG") && !k.toLowerCase().includes("ponies") && !k.includes(" and ") && !k.includes("(") && !k.includes("family"));
+ }
+
+ let list = [...new Set([
+ ...(await getCategory("Pegasus ponies")),
+ ...(await getCategory("Alicorn ponies")),
+ ...(await getCategory("Earth ponies")),
+ ...(await getCategory("Unicorn ponies")),
+ ...(await getCategory("Main characters")),
+ ...(await getCategory("Dragons")),
+ ])];
+ fs.writeFileSync("./data/list.json", JSON.stringify(list, null, 4))
+})() \ No newline at end of file
diff --git a/update/pages.js b/update/pages.js
index 8449ca5..7f3a1c8 100644
--- a/update/pages.js
+++ b/update/pages.js
@@ -1,13 +1,10 @@
const fs = require('fs');
const axios = require('axios');
-if (fs.existsSync("./data")) fs.rmSync("./data", { recursive: true });
-fs.mkdirSync("./data");
-
(async () => {
console.log("Gathering pages list...");
let pages = [];
- for (let page of JSON.parse(fs.readFileSync("list.json").toString())) {
+ for (let page of JSON.parse(fs.readFileSync("./data/list.json").toString())) {
console.log("Searching for '" + page + "'...");
try {
let data = (await axios.get("https://mlp.fandom.com/api.php?action=query&list=search&srsearch=" + encodeURI(page) + "&srlimit=1&srenablerewrites=true&format=json")).data;
diff --git a/update/parse.js b/update/parse.js
index 570c3fa..6f4ee3a 100644
--- a/update/parse.js
+++ b/update/parse.js
@@ -1,4 +1,5 @@
const fs = require('fs');
+const axios = require('axios');
console.log("Parsing infobox data...");
@@ -6,9 +7,12 @@ let ponies = {};
(async () => {
for (let title in JSON.parse(fs.readFileSync("./data/boxes.json").toString())) {
+ console.log("Parsing " + title + "...");
let box = JSON.parse(fs.readFileSync("./data/boxes.json").toString())[title];
let data = {
names: [title],
+ extract: "",
+ extract_fr: "",
color: "000000",
image: "https://example.com",
kind: "Pony",
@@ -30,13 +34,22 @@ let ponies = {};
}
if (typeof box.kind !== "undefined") {
- kp = box.kind.replace(/[^a-zA-Z0-9-_ ]/gm, "").split(" ")[0];
+ kp = box.kind.replace(/<!--[\s\S]*?-->/g, "").replace(/[^a-zA-Z0-9-_ ]/gm, "").split(" ")[0];
data.kind = kp.substr(kp.replace(/([A-Z])([a-z0-9]*)$/g, "").length);
}
if (typeof box.sex !== "undefined") data.sex = box.sex.startsWith("F") ? "F" : "M";
+ if (typeof box._extract !== "undefined") {
+ data.extract = box._extract;
+ }
+ if (typeof box._extract_fr !== "undefined") {
+ data.extract_fr = box._extract_fr.replace(/ma petite amitié de poney est la magie/gmi, "My Little Pony Friendship is Magic").replace(/Mon amitié avec mon petit poney est magique/gmi, "My Little Pony Friendship is Magic").replace(/Mon petit Poney/gmi, "My Little Pony").replace(/Mon Petit Poney Une Nouvelle Génération/gmi, "My Little Pony A New Generation").replace(/Mon petit poney : une nouvelle génération/gmi, "My Little Pony: A New Generation").replace(/pegasus/gmi, "pégase").replace(/alicorn[^e]/gmi, "alicorne").replace(/une poney/gmi, "une ponette").replace(/petite génération de poney 5/gmi, "Génération 5");
+ } else {
+ data.extract_fr = data.extract;
+ }
if (typeof box.coat !== "undefined") data.color = box.coat.replace(/\[(.*)\/(.{6})\/ (.*)\]/gm, "$2").replace(/{{perbang\|([0-9A-Fa-f].{5})(.*)/g, "$1");
if (typeof box.main !== "undefined") data.image = "https://mlp.fandom.com/Special:FilePath/" + encodeURI(box.main);
if (typeof box.main1 !== "undefined") data.image = "https://mlp.fandom.com/Special:FilePath/" + encodeURI(box.main1);
+ if (typeof box.image !== "undefined") data.image = "https://mlp.fandom.com/Special:FilePath/" + encodeURI(box.image);
if (typeof box["cutie mark"] !== "undefined") {
try {
data.markimg = box["cutie mark"].split("[[File:")[1].split("|")[0];
@@ -48,20 +61,20 @@ let ponies = {};
if (typeof box.occupation !== "undefined") {
occupations = [];
- box.occupation.replace(/\((.*)\)/gm, "").replace(/\[\[(.*)_(.*)\]\]|\[\[(.*)\|(.*)\]\]|\[\[(.*)\]\]/gm, "$2$4$5").replace(/\|/gm, "_").replace(/<( ||(|| )\/)( ||(|| )\/)(b|B)(r|R)( ||(|| )\/)( ||(|| )\/)>/gm, "|").replace(/( \|| \| | \| )/gm, "|").split("|").forEach((e) => {
- occupations.push(e.trim().replace(/[\[\]]/gm, "").replace(/<(.*)>/gm, ""));
+ box.occupation/*.replace(/\[\[(.*)_(.*)\]\]|\[\[(.*)\|(.*)\]\]|\[\[(.*)\]\]/gm, "$2$4$5")*/.replace(/\|/gm, "_").replace(/<( ||(|| )\/)( ||(|| )\/)(b|B)(r|R)( ||(|| )\/)( ||(|| )\/)>/gm, "|").replace(/( \|| \| | \| )/gm, "|").split("|").forEach((e) => {
+ occupations.push(e.trim().replace(/[\[\]]/gm, "").replace(/<(.*)>/gm, "").replace(/([a-zA-Z0-9 \-_,.'"]*)_([a-zA-Z0-9 \-_,.'"]*)/gm, "$2"));
})
data.occupation = occupations;
}
if (typeof box.residence !== "undefined") {
residences = [];
- box.residence.replace(/\((.*)\)/gm, "").replace(/\[\[(.*)_(.*)\]\]|\[\[(.*)\|(.*)\]\]|\[\[(.*)\]\]/gm, "$2$4$5").replace(/\|/gm, "_").replace(/<( ||(|| )\/)( ||(|| )\/)(b|B)(r|R)( ||(|| )\/)( ||(|| )\/)>/gm, "|").replace(/( \|| \| | \| )/gm, "|").split("|").forEach((e) => {
- residences.push(e.trim().replace(/[\[\]]/gm, "").replace(/<(.*)>/gm, ""));
+ box.residence/*.replace(/\[\[(.*)_(.*)\]\]|\[\[(.*)\|(.*)\]\]|\[\[(.*)\]\]/gm, "$2$4$5")*/.replace(/\|/gm, "_").replace(/<( ||(|| )\/)( ||(|| )\/)(b|B)(r|R)( ||(|| )\/)( ||(|| )\/)>/gm, "|").replace(/( \|| \| | \| )/gm, "|").split("|").forEach((e) => {
+ residences.push(e.trim().replace(/[\[\]]/gm, "").replace(/<(.*)>/gm, "").replace(/([a-zA-Z0-9 \-_,.'"]*)_([a-zA-Z0-9 \-_,.'"]*)/gm, "$2"));
})
data.residence = residences;
}
- ponies[title] = data;
+ if (typeof box.name2 !== "undefined" || typeof box.name3 !== "undefined" || typeof box.name4 !== "undefined" || typeof box.name5 !== "undefined" || typeof box.coat !== "undefined" || typeof box.occupation !== "undefined" || typeof box.residence !== "undefined") ponies[title] = data;
}
})()