From e44e2fe070484e06d384a31ef2699c3a2d5d474e Mon Sep 17 00:00:00 2001 From: RaindropsSys Date: Thu, 13 Jun 2024 15:46:03 +0200 Subject: GitHub migration --- e621/categories.png | Bin 0 -> 24434 bytes e621/csv2json.js | 34 +++++++++ e621/match.js | 91 ++++++++++++++++++++++++ e621/package-lock.json | 47 ++++++++++++ e621/package.json | 6 ++ e621/parse.js | 30 ++++++++ e621/update.js | 189 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 397 insertions(+) create mode 100755 e621/categories.png create mode 100755 e621/csv2json.js create mode 100755 e621/match.js create mode 100755 e621/package-lock.json create mode 100755 e621/package.json create mode 100755 e621/parse.js create mode 100755 e621/update.js (limited to 'e621') diff --git a/e621/categories.png b/e621/categories.png new file mode 100755 index 0000000..da09fde Binary files /dev/null and b/e621/categories.png differ diff --git a/e621/csv2json.js b/e621/csv2json.js new file mode 100755 index 0000000..ee55fdb --- /dev/null +++ b/e621/csv2json.js @@ -0,0 +1,34 @@ +const { parse } = require("csv-parse"); +const fs = require('fs'); + +let tags = []; +let aliases = []; + +fs.createReadStream("./tags.csv") + .pipe(parse({ delimiter: ",", from_line: 2 })) + .on("data", (row) => { + tags.push({ + id: parseInt(row[0]), + name: row[1], + category: parseInt(row[2]), + postCount: parseInt(row[3]) + }); + }) + .on("end", () => { + fs.writeFileSync("./tags.json", JSON.stringify(tags)); + }); + +fs.createReadStream("./tag_aliases.csv") + .pipe(parse({ delimiter: ",", from_line: 2 })) + .on("data", (row) => { + aliases.push({ + id: parseInt(row[0]), + antecedentName: row[1], + consequentName: row[2], + createdAt: new Date(row[3]), + status: row[4] + }); + }) + .on("end", () => { + fs.writeFileSync("./tag_aliases.json", JSON.stringify(aliases)); + }); diff --git a/e621/match.js b/e621/match.js new file mode 100755 index 0000000..f2272e7 --- /dev/null +++ b/e621/match.js @@ -0,0 +1,91 @@ +const PATH = require('fs').readFileSync(require('os').homedir() + "/.prisbeam_path").toString().trim(); + +const sqlite3 = require('sqlite3').verbose(); +const db = new sqlite3.Database(PATH, sqlite3.OPEN_READONLY); +const tags = require('./parsed_tags.json'); + +const fs = require('fs'); + +db.serialize(async () => { + function query(q) { + return new Promise((res, rej) => { + db.all(q, function (err, data) { + if (err) { + rej(err); + } else { + res(data); + } + }); + }); + } + + function sqlstr(str) { + if (str === null) { + return "NULL"; + } else { + return "'" + str.replaceAll("'", "''") + "'"; + } + } + + let i = 0; + let success = 0; + let total = 0; + let covered = 0; + let successPerCategory = [0, 0, null, 0, 0, 0, 0, 0, 0]; + let tagsList = Object.entries(tags); + + let done = 0; + let coverage = 0; + let matched = 0; + + for (let _tag of tagsList) { + let name = _tag[0]; + let tag = _tag[1]; + let condition = tag.allowedNames.slice(0, 100).map(i => [i.replaceAll("_", " "), i.replaceAll("_", "+")]).reduce((a, b) => [...a, ...b]).map(i => "name = " + sqlstr(i) + " OR slug = " + sqlstr(i)).join(" OR "); + + let matches = await query("SELECT * FROM tags WHERE name = " + sqlstr(name.replaceAll("_", " ")) + " OR slug = " + sqlstr(name.replaceAll("_", "+"))); + let matchesAlias = await query("SELECT * FROM tags WHERE " + condition); + + let match = null; + + if (matches.length > 0) { + match = matches[0]; + } else if (matchesAlias.length > 0) { + match = matchesAlias[0]; + } + + i++; + total += tag.usage; + + done = (i / tagsList.length) * 100; + coverage = (covered / total) * 100; + matched = (success / i) * 100; + + let txt = done.toFixed(2) + "% done - " + coverage.toFixed(2) + "% coverage - " + matched.toFixed(2) + "% matched (" + successPerCategory.map(j => ((j / i) * 100).toFixed(1) + "%").join(", ") + ") - Current: "; + + if (match) { + tag.derpibooruMatch = [parseInt(match.id.toString().substring(2)), match.name] + txt += tag.id + " -> " + tag.derpibooruMatch; + success++; + successPerCategory[tag.category]++; + covered += tag.usage; + } else { + txt += tag.id + " -> ???"; + } + + txt = txt.substring(0, process.stdout.columns - 1); + process.stdout.write(txt + " ".repeat(process.stdout.columns - 1 - txt.length)); + process.stdout.cursorTo(0); + } + + process.stdout.clearLine(null); + process.stdout.write("Saving to disk..."); + + fs.writeFileSync("parsed_tags.json", JSON.stringify(tags, null, 2)); + process.stdout.cursorTo(0); + console.log("Matching operation completed."); + console.log("Here is a breakdown:"); + console.log(" * " + success + " tags out of " + i + " (" + matched.toFixed(3) + "%) could be matched successfully.") + console.log(successPerCategory.map((j, k) => " * " + ((j / i) * 100).toFixed(3) + "% from category " + k).join("\n")); + console.log(" * This means that the matched tags cover " + coverage.toFixed(3) + "% of posts on e621, roughly 1 in " + (100 / coverage).toFixed(1) + ".") +}); diff --git a/e621/package-lock.json b/e621/package-lock.json new file mode 100755 index 0000000..556abc0 --- /dev/null +++ b/e621/package-lock.json @@ -0,0 +1,47 @@ +{ + "name": "e621", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "csv": "^6.3.8", + "csv-parse": "^5.5.5" + } + }, + "node_modules/csv": { + "version": "6.3.8", + "resolved": "https://registry.npmjs.org/csv/-/csv-6.3.8.tgz", + "integrity": "sha512-gRh3yiT9bHBA5ka2yOpyFqAVu/ZpwWzajMUR/es0ljevAE88WyHBuMUy7jzd2o5j6LYQesEO/AyhbQ9BhbDXUA==", + "dependencies": { + "csv-generate": "^4.4.0", + "csv-parse": "^5.5.5", + "csv-stringify": "^6.4.6", + "stream-transform": "^3.3.1" + }, + "engines": { + "node": ">= 0.1.90" + } + }, + "node_modules/csv-generate": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-4.4.0.tgz", + "integrity": "sha512-geM01acNPZ0wr4/9sKev5fCzFG/tsc/NbuFWrhLc47M1zQyUdEJH65+cxTLIVafEwhBjIYwQ7fdOL9roBqVltQ==" + }, + "node_modules/csv-parse": { + "version": "5.5.5", + "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.5.tgz", + "integrity": "sha512-erCk7tyU3yLWAhk6wvKxnyPtftuy/6Ak622gOO7BCJ05+TYffnPCJF905wmOQm+BpkX54OdAl8pveJwUdpnCXQ==" + }, + "node_modules/csv-stringify": { + "version": "6.4.6", + "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-6.4.6.tgz", + "integrity": "sha512-h2V2XZ3uOTLilF5dPIptgUfN/o2ia/80Ie0Lly18LAnw5s8Eb7kt8rfxSUy24AztJZas9f6DPZpVlzDUtFt/ag==" + }, + "node_modules/stream-transform": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-3.3.1.tgz", + "integrity": "sha512-BL8pv9QL8Ikd11oZwlRDp1qYMhGR0i50zI9ltoijKGc4ubQWal/Rc4p6SYJp1TBOGpE0uAGchwbxOZ1ycwTuqQ==" + } + } +} diff --git a/e621/package.json b/e621/package.json new file mode 100755 index 0000000..d0c2ada --- /dev/null +++ b/e621/package.json @@ -0,0 +1,6 @@ +{ + "dependencies": { + "csv": "^6.3.8", + "csv-parse": "^5.5.5" + } +} diff --git a/e621/parse.js b/e621/parse.js new file mode 100755 index 0000000..b848708 --- /dev/null +++ b/e621/parse.js @@ -0,0 +1,30 @@ +const fs = require('fs'); + +let tags = require('./tags.json'); +let aliases = require('./tag_aliases.json'); +let final = {}; + +for (let tag of tags) { + final[tag.name] = { + id: tag.id, + allowedNames: [tag.name], + usage: tag.postCount, + category: tag.category, + derpibooruMatch: null + } +} + +for (let alias of aliases) { + if (alias.status === "deleted" || alias.status === "pending") continue; + + try { + let aliasedTag = final[alias.consequentName]; + aliasedTag.allowedNames.push(alias.antecedentName); + } catch (e) { + console.error(e); + console.log(alias); + return; + } +} + +fs.writeFileSync("parsed_tags.json", JSON.stringify(final, null, 2)); diff --git a/e621/update.js b/e621/update.js new file mode 100755 index 0000000..7c44c73 --- /dev/null +++ b/e621/update.js @@ -0,0 +1,189 @@ +const API_ID = require('./credentials.json').id; +const API_KEY = require('./credentials.json').key; +const TOKEN = API_ID + ":" + API_KEY; + +const sqlite3 = require('sqlite3').verbose(); +const fs = require('fs'); +const tags = require('./parsed_tags.json'); + +if (!process.argv[2]) { + console.log("Missing operand: please pass the origin database"); + return; +} + +console.log("Creating copy..."); +fs.copyFileSync(process.argv[2], process.argv[2] + ".work.db"); + +console.log("Opening database..."); +const db = new sqlite3.Database(process.argv[2] + ".work.db"); + +db.serialize(async () => { + function sql(q) { + return new Promise((res, rej) => { + db.all(q, function (err, data) { + if (err) { + rej(err); + } else { + res(data); + } + }); + }); + } + + const list = []; + let page = 1; + let lastList = [null]; + + function sleep(ms) { + return new Promise((res) => { + setTimeout(res, ms); + }); + } + + process.stdout.write("Gathering items..."); + + while (lastList.length > 0) { + let res = await fetch("https://e621.net/posts.json?page=" + page + "&limit=320&tags=" + encodeURIComponent("fav:" + API_ID), { + headers: { + "Authorization": "Basic " + btoa(TOKEN), + "User-Agent": "Mozilla/5.0 (+Prisbeam; https://source.equestria.dev/equestria.dev/prisbeam)" + } + }); + + let data = (await res.json())["posts"]; + list.push(...data); + lastList = data; + page++; + + process.stdout.clearLine(); + process.stdout.cursorTo(0); + process.stdout.write("Gathering items... " + list.length); + + await sleep(1000); + } + + function extToMime(ext) { + switch (ext) { + case "jpg": + case "jpe": + case "jpeg": + case "jfif": + case "jif": + case "jfi": + return "image/jpeg"; + + case "png": + return "image/png"; + + case "gif": + return "image/gif"; + + case "swf": + return "application/x-shockwave-flash"; + + case "webm": + return "video/webm" + } + } + + function getRatingTag(rating, text) { + switch (rating) { + case "s": + if (text) return "safe"; + return 1040482; + + case "q": + if (text) return "suggestive"; + return 1043502; + + case "e": + if (text) return "explicit"; + return 1026707; + } + } + + process.stdout.write("\nProcessing images..."); + let i = 0; + + for (let image of list) { + let data = { + wilson_score: 0, + spoilered: false, + representations: { + full: image["file"]["url"], + large: image["file"]["url"], + medium: image["preview"]["url"], + small: image["preview"]["url"], + tall: image["preview"]["url"], + thumb: image["sample"]["has"] ? image["sample"]["url"] : image["preview"]["url"], + thumb_small: image["sample"]["has"] ? image["sample"]["url"] : image["preview"]["url"], + thumb_tiny: image["sample"]["has"] ? image["sample"]["url"] : image["preview"]["url"], + }, + faves: 0, + aspect_ratio: image["file"]["width"] / image["file"]["height"], + duration: image["duration"] ?? 0, + thumbnails_generated: true, + tags: [ + getRatingTag(image["rating"], true), + ...Object.values(image["tags"]).reduce((a, b) => [...a, ...b]).filter(i => tags[i] && tags[i]["derpibooruMatch"]).map(i => tags[i]["derpibooruMatch"][1]) + ], + created_at: image["created_at"], + tag_count: 0, + downvotes: 0, + id: parseInt("20" + image["id"]), + source_id: image["id"], + source: "https://e621.net/posts/%s", + source_name: "e621", + name: image["file"]["md5"] + "." + image["file"]["ext"], + width: image["file"]["width"], + intensities: { + ne: 0, + nw: 0, + se: 0, + sw: 0 + }, + orig_sha512_hash: image["file"]["md5"], + deletion_reason: null, + processed: true, + animated: null, + height: image["file"]["height"], + description: image["description"], + sha512_hash: image["file"]["md5"], + source_urls: image["sources"], + upvotes: 0, + source_url: image["sources"][0] ?? null, + uploader_id: image["uploader_id"], + score: 0, + uploader: null, + first_seen_at: image["created_at"], + mime_type: extToMime(image["file"]["ext"]), + duplicate_of: null, + size: image["file"]["size"], + comment_count: 0, + view_url: image["file"]["url"], + hidden_from_users: false, + updated_at: image["updated_at"], + tag_ids: [ + getRatingTag(image["rating"]), + ...Object.values(image["tags"]).reduce((a, b) => [...a, ...b]).filter(i => tags[i] && tags[i]["derpibooruMatch"]).map(i => parseInt("10" + tags[i]["derpibooruMatch"][0])) + ], + format: image["file"]["ext"] + }; + + let readyData = Buffer.from(JSON.stringify(data)).toString("base64"); + await sql("INSERT INTO images VALUES (\"" + readyData + "\")"); + + i++; + process.stdout.clearLine(); + process.stdout.cursorTo(0); + process.stdout.write("Processing images... " + i); + } + + console.log("\nClosing database..."); + + db.close(() => { + console.log("Done!"); + fs.copyFileSync(process.argv[2] + ".work.db", process.argv[2]); + fs.unlinkSync(process.argv[2] + ".work.db"); + }); +}); -- cgit