summaryrefslogtreecommitdiff
path: root/src/train.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/train.js')
-rw-r--r--src/train.js62
1 files changed, 62 insertions, 0 deletions
diff --git a/src/train.js b/src/train.js
new file mode 100644
index 0000000..391039a
--- /dev/null
+++ b/src/train.js
@@ -0,0 +1,62 @@
+try {
+ const sslangs = require('./languages.json');
+
+ const diacritics = require('./diacritics');
+ const count = require('./counter');
+ const percentages = require('./percentages');
+ const save = require('./savers');
+ const worder = require('./words');
+
+ const fs = require('fs');
+
+ class LangdetectError extends Error {
+ constructor(...params) {
+ super(...params)
+ // We're spreading `params` as a way to bring all of `Error`'s functionality in.
+ }
+ }
+
+ if (typeof process.argv[2] == "undefined" || typeof process.argv[3] == "undefined") {
+ throw new LangdetectError("args: Missing operand");
+ }
+
+ sslangs.forEach((lg) => {
+ if (process.argv[3] == lg.code) {
+ plang = lg.name;
+ lit = lg;
+ }
+ })
+
+ if (typeof plang == "undefined") {
+ throw new LangdetectError("lang: Invalid language");
+ }
+
+ console.log("Decomposing text... This will take a while for long texts!");
+ clean = diacritics(process.argv[2].toLowerCase());
+ cleaner = clean.replace(/[^a-zA-Z]/g, "");
+ cleaner2 = clean.replace(/[^a-zA-Z ]/g, "");
+ letters = cleaner.split("");
+ words = worder(cleaner2, lit);
+ console.log("Text contains " + letters.length + " letters");
+ console.log("Calculating percentage for all letters...");
+ qty = count(letters);
+ if (qty.total != letters.length) {
+ console.log("WARNING: " + (letters.length - qty.total) + " characters were not included while counting all letters. This is probably a bug and should be reported.");
+ }
+ percs = percentages(qty);
+ console.log("According to parameter, this is " + plang + " (langcode: " + process.argv[3] + "), so let's add this to the database");
+
+ if (fs.existsSync("./data/" + process.argv[3] + ".dat")) {
+ console.log("Database " + process.argv[3] + ".dat exists, loading it");
+ } else {
+ console.log("Database " + process.argv[3] + ".dat doesn't exists, creating it");
+ fs.writeFileSync("./data/" + process.argv[3] + ".dat", "{\"a\":{\"entries\":[],\"average\":0},\"b\":{\"entries\":[],\"average\":0},\"c\":{\"entries\":[],\"average\":0},\"d\":{\"entries\":[],\"average\":0},\"e\":{\"entries\":[],\"average\":0},\"f\":{\"entries\":[],\"average\":0},\"g\":{\"entries\":[],\"average\":0},\"h\":{\"entries\":[],\"average\":0},\"i\":{\"entries\":[],\"average\":0},\"j\":{\"entries\":[],\"average\":0},\"k\":{\"entries\":[],\"average\":0},\"l\":{\"entries\":[],\"average\":0},\"m\":{\"entries\":[],\"average\":0},\"n\":{\"entries\":[],\"average\":0},\"o\":{\"entries\":[],\"average\":0},\"p\":{\"entries\":[],\"average\":0},\"q\":{\"entries\":[],\"average\":0},\"r\":{\"entries\":[],\"average\":0},\"s\":{\"entries\":[],\"average\":0},\"t\":{\"entries\":[],\"average\":0},\"u\":{\"entries\":[],\"average\":0},\"v\":{\"entries\":[],\"average\":0},\"w\":{\"entries\":[],\"average\":0},\"x\":{\"entries\":[],\"average\":0},\"y\":{\"entries\":[],\"average\":0},\"z\":{\"entries\":[],\"average\":0},\"words\":[]}")
+ }
+ save(percs, words, "./data/" + process.argv[3] + ".dat");
+} catch (e) {
+ if (e.message == "Unexpected end of JSON input") {
+ console.log("Unable to open database file. Is the databased opened by another program? Or is it corrupted?");
+ } else {
+ throw e;
+ }
+} \ No newline at end of file