From 46e43f4bde4a35785b4997b81e86cd19f046b69b Mon Sep 17 00:00:00 2001 From: Minteck Date: Tue, 21 Dec 2021 16:52:28 +0100 Subject: Commit --- src/detect.js | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 src/detect.js (limited to 'src/detect.js') diff --git a/src/detect.js b/src/detect.js new file mode 100644 index 0000000..1319fff --- /dev/null +++ b/src/detect.js @@ -0,0 +1,140 @@ +splangs = require('./languages.json'); + +function reverseObject(object) { + var newObject = {}; + var keys = []; + + for (var key in object) { + keys.push(key); + } + + for (var i = keys.length - 1; i >= 0; i--) { + var value = object[keys[i]]; + newObject[keys[i]]= value; + } + + return newObject; +} + +try { + old = new Date(); + lold = new Date(); + + const diacritics = require('./diacritics'); + const count = require('./counter'); + const percentages = require('./percentages'); + const differenciate = require('./differenciate'); + const bestLanguage = require('./better'); + const prettylang = require('./prettylang'); + const wordsDetect = require('./wdetect'); + + fs = require('fs'); + chalk = require('chalk'); + + class LangdetectError extends Error { + constructor(...params) { + super(...params) + // We're spreading `params` as a way to bring all of `Error`'s functionality in. + } + } + + if (typeof process.argv[2] == "undefined") { + throw new LangdetectError("args: Missing operand"); + } + + console.log("Let's try to detect which language this is!"); + + lrec = new Date(); + + require("./status"); + + told = new Date(); + console.log("Decomposing text... This will take a while for long texts!"); + clean = diacritics(process.argv[2].toLowerCase()); + cleaner = clean.replace(/[^a-zA-Z]/g, ""); + cleaner2 = clean.replace(/[^a-zA-Z ]/g, ""); + wcwords = cleaner2.split(/[, ]+/).length; + letters = cleaner.split(""); + console.log("Text contains " + letters.length + " letters"); + console.log("Calculating percentage for all letters..."); + qty = count(letters); + if (qty.total != letters.length) { + console.log(chalk.red(chalk.bold("WARNING: ") + (letters.length - qty.total) + " characters were not included while counting all letters. This is probably a bug and should be reported.")); + } + percs = percentages(qty); + trec = new Date(); + tdiff = trec - told; + mold = new Date(); + console.log("Getting matches for each letter in each supported language..."); + diffs = differenciate(percs, langs); + mrec = new Date(); + mdiff = mrec - mold + + bold = new Date(); + console.log("Finding the best matches between all supported languages..."); + precounts = bestLanguage(diffs); + brec = new Date(); + bdiff = brec - bold; + + wold = new Date(); + console.log("Including proportion of recognized words..."); + newcounts = wordsDetect(precounts, cleaner2, langs); + wcount = newcounts["_words"]; + delete newcounts["_words"]; + preprecounts = Object.entries(newcounts) + .sort(([,a],[,b]) => a-b) + .reduce((r, [k, v]) => ({ ...r, [k]: v }), {}); + counts = reverseObject(preprecounts); + wrec = new Date(); + wdiff = wrec - wold; + + rec = new Date(); + ddiff = rec - old; + ldiff = lrec - lold; + console.log(chalk.green("Done in " + Math.ceil(ddiff) + " ms:\n * " + Math.ceil(ldiff) + " ms loading required files\n * " + Math.ceil(tdiff) + " ms decomposing text\n * " + Math.ceil(mdiff) + " ms gathering matches\n * " + Math.ceil(bdiff) + " ms finding the best matches\n * " + Math.ceil(wdiff) + " ms including known words\n * " + Math.ceil(ddiff - (ldiff + tdiff + mdiff + bdiff + wdiff)) + " ms doing other things")); + + if (ent < 600) { + console.log(chalk.red(chalk.bold("WARNING: ") + "Not enough entries to have accurate results: " + ent + "/600 (" + Math.round((ent/600)*100) + "%)")); + } + + keys = Object.keys(counts); + total = 0; + keys.forEach((key) => { + total = total + counts[key]; + }) + + prewcount = Object.entries(wcount) + .sort(([,a],[,b]) => a-b) + .reduce((r, [k, v]) => ({ ...r, [k]: v }), {}); + wcount = reverseObject(prewcount); + + if (wcount[keys[0]] < 1) { + console.log(chalk.red(chalk.bold("WARNING: ") + "No words detected in text, results may not be very accurate")); + } + + if (letters.length < 16) { + console.log(chalk.red(chalk.bold("WARNING: ") + "Not enough letters to get accurate results, consider adding a whole sentence instead of isolated words.")); + } + if (letters.length > 1000) { + console.log(chalk.red(chalk.bold("WARNING: ") + "Too much letters, consider detecting only a single sentence instead of a whole document.")); + } + + console.log("\nResults:"); + console.log(chalk.bold(" 1. " + prettylang(keys[0]) + " (" + Math.round((counts[keys[0]] / total) * 100) + "%, " + wcount[keys[0]] + "/" + wcwords + " words)")); + if (counts[keys[1]] > 0) { + console.log(" 2. " + prettylang(keys[1]) + " (" + Math.round((counts[keys[1]] / total) * 100) + "%, " + wcount[keys[1]] + "/" + wcwords + " words)"); + } else { + console.log(chalk.gray(" 2. " + prettylang(keys[1]) + " (" + Math.round((counts[keys[1]] / total) * 100) + "%, " + wcount[keys[1]] + "/" + wcwords + " words)")); + } + if (counts[keys[2]] > 0) { + console.log(" 3. " + prettylang(keys[2]) + " (" + Math.round((counts[keys[2]] / total) * 100) + "%, " + wcount[keys[2]] + "/" + wcwords + " words)"); + } else { + console.log(chalk.gray(" 3. " + prettylang(keys[2]) + " (" + Math.round((counts[keys[2]] / total) * 100) + "%, " + wcount[keys[2]] + "/" + wcwords + " words)")); + } +} catch (e) { + if (e.message == "Unexpected end of JSON input") { + console.log(require('chalk').red("Unable to open database file. Is the databased opened by another program? Or is it corrupted?")); + } else { + throw e; + } +} \ No newline at end of file -- cgit