summaryrefslogtreecommitdiff
path: root/src/detect.js
blob: 1319fff97564d68af75c40d004703fe5bdd0f255 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
splangs = require('./languages.json');

function reverseObject(object) {
    var newObject = {};
    var keys = [];

    for (var key in object) {
        keys.push(key);
    }

    for (var i = keys.length - 1; i >= 0; i--) {
        var value = object[keys[i]];
        newObject[keys[i]]= value;
    }

    return newObject;
}

try {
    old = new Date();
    lold = new Date();

    const diacritics = require('./diacritics');
    const count = require('./counter');
    const percentages = require('./percentages');
    const differenciate = require('./differenciate');
    const bestLanguage = require('./better');
    const prettylang = require('./prettylang');
    const wordsDetect = require('./wdetect');

    fs = require('fs');
    chalk = require('chalk');

    class LangdetectError extends Error {
        constructor(...params) {
            super(...params)
            // We're spreading `params` as a way to bring all of `Error`'s functionality in.
        }
    }

    if (typeof process.argv[2] == "undefined") {
        throw new LangdetectError("args: Missing operand");
    }

    console.log("Let's try to detect which language this is!");

    lrec = new Date();

    require("./status");

    told = new Date();
    console.log("Decomposing text... This will take a while for long texts!");
    clean = diacritics(process.argv[2].toLowerCase());
    cleaner = clean.replace(/[^a-zA-Z]/g, "");
    cleaner2 = clean.replace(/[^a-zA-Z ]/g, "");
    wcwords = cleaner2.split(/[, ]+/).length;
    letters = cleaner.split("");
    console.log("Text contains " + letters.length + " letters");
    console.log("Calculating percentage for all letters...");
    qty = count(letters);
    if (qty.total != letters.length) {
        console.log(chalk.red(chalk.bold("WARNING: ") + (letters.length - qty.total) + " characters were not included while counting all letters. This is probably a bug and should be reported."));
    }
    percs = percentages(qty);
    trec = new Date();
    tdiff = trec - told;
    mold = new Date();
    console.log("Getting matches for each letter in each supported language...");
    diffs = differenciate(percs, langs);
    mrec = new Date();
    mdiff = mrec - mold

    bold = new Date();
    console.log("Finding the best matches between all supported languages...");
    precounts = bestLanguage(diffs);
    brec = new Date();
    bdiff = brec - bold;

    wold = new Date();
    console.log("Including proportion of recognized words...");
    newcounts = wordsDetect(precounts, cleaner2, langs);
    wcount = newcounts["_words"];
    delete newcounts["_words"];
    preprecounts = Object.entries(newcounts)
        .sort(([,a],[,b]) => a-b)
        .reduce((r, [k, v]) => ({ ...r, [k]: v }), {});
    counts = reverseObject(preprecounts);
    wrec = new Date();
    wdiff = wrec - wold;

    rec = new Date();
    ddiff = rec - old;
    ldiff = lrec - lold;
    console.log(chalk.green("Done in " + Math.ceil(ddiff) + " ms:\n * " + Math.ceil(ldiff) + " ms loading required files\n * " + Math.ceil(tdiff) + " ms decomposing text\n * " + Math.ceil(mdiff) + " ms gathering matches\n * " + Math.ceil(bdiff) + " ms finding the best matches\n * " + Math.ceil(wdiff) + " ms including known words\n * " + Math.ceil(ddiff - (ldiff + tdiff + mdiff + bdiff + wdiff)) + " ms doing other things"));

    if (ent < 600) {
        console.log(chalk.red(chalk.bold("WARNING: ") + "Not enough entries to have accurate results: " + ent + "/600 (" + Math.round((ent/600)*100) + "%)"));
    }

    keys = Object.keys(counts);
    total = 0;
    keys.forEach((key) => {
        total = total + counts[key];
    })

    prewcount = Object.entries(wcount)
        .sort(([,a],[,b]) => a-b)
        .reduce((r, [k, v]) => ({ ...r, [k]: v }), {});
    wcount = reverseObject(prewcount);

    if (wcount[keys[0]] < 1) {
        console.log(chalk.red(chalk.bold("WARNING: ") + "No words detected in text, results may not be very accurate"));
    }

    if (letters.length < 16) {
        console.log(chalk.red(chalk.bold("WARNING: ") + "Not enough letters to get accurate results, consider adding a whole sentence instead of isolated words."));
    }
    if (letters.length > 1000) {
        console.log(chalk.red(chalk.bold("WARNING: ") + "Too much letters, consider detecting only a single sentence instead of a whole document."));
    }

    console.log("\nResults:");
    console.log(chalk.bold(" 1. " + prettylang(keys[0]) + " (" + Math.round((counts[keys[0]] / total) * 100) + "%, " + wcount[keys[0]] + "/" + wcwords + " words)"));
    if (counts[keys[1]] > 0) {
        console.log(" 2. " + prettylang(keys[1]) + " (" + Math.round((counts[keys[1]] / total) * 100) + "%, " + wcount[keys[1]] + "/" + wcwords + " words)");
    } else {
        console.log(chalk.gray(" 2. " + prettylang(keys[1]) + " (" + Math.round((counts[keys[1]] / total) * 100) + "%, " + wcount[keys[1]] + "/" + wcwords + " words)"));
    }
    if (counts[keys[2]] > 0) {
        console.log(" 3. " + prettylang(keys[2]) + " (" + Math.round((counts[keys[2]] / total) * 100) + "%, " + wcount[keys[2]] + "/" + wcwords + " words)");
    } else {
        console.log(chalk.gray(" 3. " + prettylang(keys[2]) + " (" + Math.round((counts[keys[2]] / total) * 100) + "%, " + wcount[keys[2]] + "/" + wcwords + " words)"));
    }
} catch (e) {
    if (e.message == "Unexpected end of JSON input") {
        console.log(require('chalk').red("Unable to open database file. Is the databased opened by another program? Or is it corrupted?"));
    } else {
        throw e;
    }
}