aboutsummaryrefslogtreecommitdiff
path: root/e621
diff options
context:
space:
mode:
Diffstat (limited to 'e621')
-rwxr-xr-xe621/categories.pngbin0 -> 24434 bytes
-rwxr-xr-xe621/csv2json.js34
-rwxr-xr-xe621/match.js91
-rwxr-xr-xe621/package-lock.json47
-rwxr-xr-xe621/package.json6
-rwxr-xr-xe621/parse.js30
-rwxr-xr-xe621/update.js189
7 files changed, 397 insertions, 0 deletions
diff --git a/e621/categories.png b/e621/categories.png
new file mode 100755
index 0000000..da09fde
--- /dev/null
+++ b/e621/categories.png
Binary files differ
diff --git a/e621/csv2json.js b/e621/csv2json.js
new file mode 100755
index 0000000..ee55fdb
--- /dev/null
+++ b/e621/csv2json.js
@@ -0,0 +1,34 @@
+const { parse } = require("csv-parse");
+const fs = require('fs');
+
+let tags = [];
+let aliases = [];
+
+fs.createReadStream("./tags.csv")
+ .pipe(parse({ delimiter: ",", from_line: 2 }))
+ .on("data", (row) => {
+ tags.push({
+ id: parseInt(row[0]),
+ name: row[1],
+ category: parseInt(row[2]),
+ postCount: parseInt(row[3])
+ });
+ })
+ .on("end", () => {
+ fs.writeFileSync("./tags.json", JSON.stringify(tags));
+ });
+
+fs.createReadStream("./tag_aliases.csv")
+ .pipe(parse({ delimiter: ",", from_line: 2 }))
+ .on("data", (row) => {
+ aliases.push({
+ id: parseInt(row[0]),
+ antecedentName: row[1],
+ consequentName: row[2],
+ createdAt: new Date(row[3]),
+ status: row[4]
+ });
+ })
+ .on("end", () => {
+ fs.writeFileSync("./tag_aliases.json", JSON.stringify(aliases));
+ });
diff --git a/e621/match.js b/e621/match.js
new file mode 100755
index 0000000..f2272e7
--- /dev/null
+++ b/e621/match.js
@@ -0,0 +1,91 @@
+const PATH = require('fs').readFileSync(require('os').homedir() + "/.prisbeam_path").toString().trim();
+
+const sqlite3 = require('sqlite3').verbose();
+const db = new sqlite3.Database(PATH, sqlite3.OPEN_READONLY);
+const tags = require('./parsed_tags.json');
+
+const fs = require('fs');
+
+db.serialize(async () => {
+ function query(q) {
+ return new Promise((res, rej) => {
+ db.all(q, function (err, data) {
+ if (err) {
+ rej(err);
+ } else {
+ res(data);
+ }
+ });
+ });
+ }
+
+ function sqlstr(str) {
+ if (str === null) {
+ return "NULL";
+ } else {
+ return "'" + str.replaceAll("'", "''") + "'";
+ }
+ }
+
+ let i = 0;
+ let success = 0;
+ let total = 0;
+ let covered = 0;
+ let successPerCategory = [0, 0, null, 0, 0, 0, 0, 0, 0];
+ let tagsList = Object.entries(tags);
+
+ let done = 0;
+ let coverage = 0;
+ let matched = 0;
+
+ for (let _tag of tagsList) {
+ let name = _tag[0];
+ let tag = _tag[1];
+ let condition = tag.allowedNames.slice(0, 100).map(i => [i.replaceAll("_", " "), i.replaceAll("_", "+")]).reduce((a, b) => [...a, ...b]).map(i => "name = " + sqlstr(i) + " OR slug = " + sqlstr(i)).join(" OR ");
+
+ let matches = await query("SELECT * FROM tags WHERE name = " + sqlstr(name.replaceAll("_", " ")) + " OR slug = " + sqlstr(name.replaceAll("_", "+")));
+ let matchesAlias = await query("SELECT * FROM tags WHERE " + condition);
+
+ let match = null;
+
+ if (matches.length > 0) {
+ match = matches[0];
+ } else if (matchesAlias.length > 0) {
+ match = matchesAlias[0];
+ }
+
+ i++;
+ total += tag.usage;
+
+ done = (i / tagsList.length) * 100;
+ coverage = (covered / total) * 100;
+ matched = (success / i) * 100;
+
+ let txt = done.toFixed(2) + "% done - " + coverage.toFixed(2) + "% coverage - " + matched.toFixed(2) + "% matched (" + successPerCategory.map(j => ((j / i) * 100).toFixed(1) + "%").join(", ") + ") - Current: ";
+
+ if (match) {
+ tag.derpibooruMatch = [parseInt(match.id.toString().substring(2)), match.name]
+ txt += tag.id + " -> " + tag.derpibooruMatch;
+ success++;
+ successPerCategory[tag.category]++;
+ covered += tag.usage;
+ } else {
+ txt += tag.id + " -> ???";
+ }
+
+ txt = txt.substring(0, process.stdout.columns - 1);
+ process.stdout.write(txt + " ".repeat(process.stdout.columns - 1 - txt.length));
+ process.stdout.cursorTo(0);
+ }
+
+ process.stdout.clearLine(null);
+ process.stdout.write("Saving to disk...");
+
+ fs.writeFileSync("parsed_tags.json", JSON.stringify(tags, null, 2));
+ process.stdout.cursorTo(0);
+ console.log("Matching operation completed.");
+ console.log("Here is a breakdown:");
+ console.log(" * " + success + " tags out of " + i + " (" + matched.toFixed(3) + "%) could be matched successfully.")
+ console.log(successPerCategory.map((j, k) => " * " + ((j / i) * 100).toFixed(3) + "% from category " + k).join("\n"));
+ console.log(" * This means that the matched tags cover " + coverage.toFixed(3) + "% of posts on e621, roughly 1 in " + (100 / coverage).toFixed(1) + ".")
+});
diff --git a/e621/package-lock.json b/e621/package-lock.json
new file mode 100755
index 0000000..556abc0
--- /dev/null
+++ b/e621/package-lock.json
@@ -0,0 +1,47 @@
+{
+ "name": "e621",
+ "lockfileVersion": 3,
+ "requires": true,
+ "packages": {
+ "": {
+ "dependencies": {
+ "csv": "^6.3.8",
+ "csv-parse": "^5.5.5"
+ }
+ },
+ "node_modules/csv": {
+ "version": "6.3.8",
+ "resolved": "https://registry.npmjs.org/csv/-/csv-6.3.8.tgz",
+ "integrity": "sha512-gRh3yiT9bHBA5ka2yOpyFqAVu/ZpwWzajMUR/es0ljevAE88WyHBuMUy7jzd2o5j6LYQesEO/AyhbQ9BhbDXUA==",
+ "dependencies": {
+ "csv-generate": "^4.4.0",
+ "csv-parse": "^5.5.5",
+ "csv-stringify": "^6.4.6",
+ "stream-transform": "^3.3.1"
+ },
+ "engines": {
+ "node": ">= 0.1.90"
+ }
+ },
+ "node_modules/csv-generate": {
+ "version": "4.4.0",
+ "resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-4.4.0.tgz",
+ "integrity": "sha512-geM01acNPZ0wr4/9sKev5fCzFG/tsc/NbuFWrhLc47M1zQyUdEJH65+cxTLIVafEwhBjIYwQ7fdOL9roBqVltQ=="
+ },
+ "node_modules/csv-parse": {
+ "version": "5.5.5",
+ "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.5.tgz",
+ "integrity": "sha512-erCk7tyU3yLWAhk6wvKxnyPtftuy/6Ak622gOO7BCJ05+TYffnPCJF905wmOQm+BpkX54OdAl8pveJwUdpnCXQ=="
+ },
+ "node_modules/csv-stringify": {
+ "version": "6.4.6",
+ "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-6.4.6.tgz",
+ "integrity": "sha512-h2V2XZ3uOTLilF5dPIptgUfN/o2ia/80Ie0Lly18LAnw5s8Eb7kt8rfxSUy24AztJZas9f6DPZpVlzDUtFt/ag=="
+ },
+ "node_modules/stream-transform": {
+ "version": "3.3.1",
+ "resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-3.3.1.tgz",
+ "integrity": "sha512-BL8pv9QL8Ikd11oZwlRDp1qYMhGR0i50zI9ltoijKGc4ubQWal/Rc4p6SYJp1TBOGpE0uAGchwbxOZ1ycwTuqQ=="
+ }
+ }
+}
diff --git a/e621/package.json b/e621/package.json
new file mode 100755
index 0000000..d0c2ada
--- /dev/null
+++ b/e621/package.json
@@ -0,0 +1,6 @@
+{
+ "dependencies": {
+ "csv": "^6.3.8",
+ "csv-parse": "^5.5.5"
+ }
+}
diff --git a/e621/parse.js b/e621/parse.js
new file mode 100755
index 0000000..b848708
--- /dev/null
+++ b/e621/parse.js
@@ -0,0 +1,30 @@
+const fs = require('fs');
+
+let tags = require('./tags.json');
+let aliases = require('./tag_aliases.json');
+let final = {};
+
+for (let tag of tags) {
+ final[tag.name] = {
+ id: tag.id,
+ allowedNames: [tag.name],
+ usage: tag.postCount,
+ category: tag.category,
+ derpibooruMatch: null
+ }
+}
+
+for (let alias of aliases) {
+ if (alias.status === "deleted" || alias.status === "pending") continue;
+
+ try {
+ let aliasedTag = final[alias.consequentName];
+ aliasedTag.allowedNames.push(alias.antecedentName);
+ } catch (e) {
+ console.error(e);
+ console.log(alias);
+ return;
+ }
+}
+
+fs.writeFileSync("parsed_tags.json", JSON.stringify(final, null, 2));
diff --git a/e621/update.js b/e621/update.js
new file mode 100755
index 0000000..7c44c73
--- /dev/null
+++ b/e621/update.js
@@ -0,0 +1,189 @@
+const API_ID = require('./credentials.json').id;
+const API_KEY = require('./credentials.json').key;
+const TOKEN = API_ID + ":" + API_KEY;
+
+const sqlite3 = require('sqlite3').verbose();
+const fs = require('fs');
+const tags = require('./parsed_tags.json');
+
+if (!process.argv[2]) {
+ console.log("Missing operand: please pass the origin database");
+ return;
+}
+
+console.log("Creating copy...");
+fs.copyFileSync(process.argv[2], process.argv[2] + ".work.db");
+
+console.log("Opening database...");
+const db = new sqlite3.Database(process.argv[2] + ".work.db");
+
+db.serialize(async () => {
+ function sql(q) {
+ return new Promise((res, rej) => {
+ db.all(q, function (err, data) {
+ if (err) {
+ rej(err);
+ } else {
+ res(data);
+ }
+ });
+ });
+ }
+
+ const list = [];
+ let page = 1;
+ let lastList = [null];
+
+ function sleep(ms) {
+ return new Promise((res) => {
+ setTimeout(res, ms);
+ });
+ }
+
+ process.stdout.write("Gathering items...");
+
+ while (lastList.length > 0) {
+ let res = await fetch("https://e621.net/posts.json?page=" + page + "&limit=320&tags=" + encodeURIComponent("fav:" + API_ID), {
+ headers: {
+ "Authorization": "Basic " + btoa(TOKEN),
+ "User-Agent": "Mozilla/5.0 (+Prisbeam; https://source.equestria.dev/equestria.dev/prisbeam)"
+ }
+ });
+
+ let data = (await res.json())["posts"];
+ list.push(...data);
+ lastList = data;
+ page++;
+
+ process.stdout.clearLine();
+ process.stdout.cursorTo(0);
+ process.stdout.write("Gathering items... " + list.length);
+
+ await sleep(1000);
+ }
+
+ function extToMime(ext) {
+ switch (ext) {
+ case "jpg":
+ case "jpe":
+ case "jpeg":
+ case "jfif":
+ case "jif":
+ case "jfi":
+ return "image/jpeg";
+
+ case "png":
+ return "image/png";
+
+ case "gif":
+ return "image/gif";
+
+ case "swf":
+ return "application/x-shockwave-flash";
+
+ case "webm":
+ return "video/webm"
+ }
+ }
+
+ function getRatingTag(rating, text) {
+ switch (rating) {
+ case "s":
+ if (text) return "safe";
+ return 1040482;
+
+ case "q":
+ if (text) return "suggestive";
+ return 1043502;
+
+ case "e":
+ if (text) return "explicit";
+ return 1026707;
+ }
+ }
+
+ process.stdout.write("\nProcessing images...");
+ let i = 0;
+
+ for (let image of list) {
+ let data = {
+ wilson_score: 0,
+ spoilered: false,
+ representations: {
+ full: image["file"]["url"],
+ large: image["file"]["url"],
+ medium: image["preview"]["url"],
+ small: image["preview"]["url"],
+ tall: image["preview"]["url"],
+ thumb: image["sample"]["has"] ? image["sample"]["url"] : image["preview"]["url"],
+ thumb_small: image["sample"]["has"] ? image["sample"]["url"] : image["preview"]["url"],
+ thumb_tiny: image["sample"]["has"] ? image["sample"]["url"] : image["preview"]["url"],
+ },
+ faves: 0,
+ aspect_ratio: image["file"]["width"] / image["file"]["height"],
+ duration: image["duration"] ?? 0,
+ thumbnails_generated: true,
+ tags: [
+ getRatingTag(image["rating"], true),
+ ...Object.values(image["tags"]).reduce((a, b) => [...a, ...b]).filter(i => tags[i] && tags[i]["derpibooruMatch"]).map(i => tags[i]["derpibooruMatch"][1])
+ ],
+ created_at: image["created_at"],
+ tag_count: 0,
+ downvotes: 0,
+ id: parseInt("20" + image["id"]),
+ source_id: image["id"],
+ source: "https://e621.net/posts/%s",
+ source_name: "e621",
+ name: image["file"]["md5"] + "." + image["file"]["ext"],
+ width: image["file"]["width"],
+ intensities: {
+ ne: 0,
+ nw: 0,
+ se: 0,
+ sw: 0
+ },
+ orig_sha512_hash: image["file"]["md5"],
+ deletion_reason: null,
+ processed: true,
+ animated: null,
+ height: image["file"]["height"],
+ description: image["description"],
+ sha512_hash: image["file"]["md5"],
+ source_urls: image["sources"],
+ upvotes: 0,
+ source_url: image["sources"][0] ?? null,
+ uploader_id: image["uploader_id"],
+ score: 0,
+ uploader: null,
+ first_seen_at: image["created_at"],
+ mime_type: extToMime(image["file"]["ext"]),
+ duplicate_of: null,
+ size: image["file"]["size"],
+ comment_count: 0,
+ view_url: image["file"]["url"],
+ hidden_from_users: false,
+ updated_at: image["updated_at"],
+ tag_ids: [
+ getRatingTag(image["rating"]),
+ ...Object.values(image["tags"]).reduce((a, b) => [...a, ...b]).filter(i => tags[i] && tags[i]["derpibooruMatch"]).map(i => parseInt("10" + tags[i]["derpibooruMatch"][0]))
+ ],
+ format: image["file"]["ext"]
+ };
+
+ let readyData = Buffer.from(JSON.stringify(data)).toString("base64");
+ await sql("INSERT INTO images VALUES (\"" + readyData + "\")");
+
+ i++;
+ process.stdout.clearLine();
+ process.stdout.cursorTo(0);
+ process.stdout.write("Processing images... " + i);
+ }
+
+ console.log("\nClosing database...");
+
+ db.close(() => {
+ console.log("Done!");
+ fs.copyFileSync(process.argv[2] + ".work.db", process.argv[2]);
+ fs.unlinkSync(process.argv[2] + ".work.db");
+ });
+});