diff options
author | Minteck <contact@minteck.org> | 2021-12-21 16:52:28 +0100 |
---|---|---|
committer | Minteck <contact@minteck.org> | 2021-12-21 16:52:28 +0100 |
commit | 46e43f4bde4a35785b4997b81e86cd19f046b69b (patch) | |
tree | c53c2f826f777f9d6b2d249dab556feb72a6c3a6 /src/filer.js | |
download | langdetect-46e43f4bde4a35785b4997b81e86cd19f046b69b.tar.gz langdetect-46e43f4bde4a35785b4997b81e86cd19f046b69b.tar.bz2 langdetect-46e43f4bde4a35785b4997b81e86cd19f046b69b.zip |
Commit
Diffstat (limited to 'src/filer.js')
-rw-r--r-- | src/filer.js | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/src/filer.js b/src/filer.js new file mode 100644 index 0000000..a93f0b4 --- /dev/null +++ b/src/filer.js @@ -0,0 +1,42 @@ +const fs = require('fs');
+const api = require('./api');
+
+function pad(n, z) {
+ z = z || 2;
+ return ('0000000000' + n).slice(-z);
+}
+
+function msToTime(s) {
+ var ms = s % 1000;
+ s = (s - ms) / 1000;
+ var secs = s % 60;
+ s = (s - secs) / 60;
+ var mins = s % 60;
+ var hrs = (s - mins) / 60;
+
+ return pad(hrs) + ':' + pad(mins) + ':' + pad(secs)
+}
+
+module.exports = (lang) => {
+ console.log("Reading files at /train/" + lang + "/...");
+ files = fs.readdirSync("./train/" + lang);
+
+ files.forEach((file, index) => {
+ console.log("Processing file /train/" + lang + "/" + file);
+ content = fs.readFileSync("./train/" + lang + "/" + file).toString();
+ sentences = content.match(/([^\.!\?]+[\.!\?]+)|([^\.!\?]+$)/g);
+ total = sentences.length;
+ done = 0;
+ sentences.forEach((e) => {
+ ccold = new Date();
+ api(e, lang, true);
+ ccrec = new Date();
+ ccdiff = ccrec - ccold;
+ if ((done / 50) == Math.round(done / 50)) {
+ cceta = ccdiff * (total - done);
+ }
+ console.log(lang + " - " + pad(done, total.toString().length) + "/" + total + " (" + pad(Math.round((done/total)*100), 3) + "%, " + pad(total - done, total.toString().length) + ") - " + pad(index + 1, files.length.toString().length) + "/" + files.length + " - " + msToTime(cceta) + " - " + msToTime((cceta) * (__train_totallangs - __train_donelangs)));
+ done++;
+ })
+ })
+}
\ No newline at end of file |