diff options
author | Minteck <contact@minteck.org> | 2022-04-09 16:39:03 +0200 |
---|---|---|
committer | Minteck <contact@minteck.org> | 2022-04-09 16:40:02 +0200 |
commit | 0f8967b9113d698cdeb2d05ca85d2d9a80461c24 (patch) | |
tree | 00664ddd9c55a2c33631fd1bd33e556cea9c67e5 /main.py | |
parent | dac03ac82bc0f8044a4b339c27b5390e4dcecf2f (diff) | |
download | voicer-trunk.tar.gz voicer-trunk.tar.bz2 voicer-trunk.zip |
Diffstat (limited to 'main.py')
-rw-r--r-- | main.py | 181 |
1 files changed, 181 insertions, 0 deletions
@@ -0,0 +1,181 @@ +#!/usr/bin/env python3 + +import argparse +import os +import queue +import sounddevice as sd +import vosk +import sys +import json +import subprocess +import locale + +subprocess.run("afplay sounds/load.wav", shell=True, check=True) + + +def say(message="Une erreur s'est produite"): + subprocess.run("say -v Audrey \"" + message.replace("'", "").replace('"', "\\\"").replace("`", "\\`") + .replace("$", "\\$").replace("{", "\\{").replace("}", "\\}") + "\"", shell=True, check=True) + + +def runaction(action="_default",input=""): + try: + locals()["_action_" + action](input) + except: + globals()["_action_" + action](input) + + +q = queue.Queue() +actions = json.loads(open("actions.json", "r").read()) +prefixes = json.loads(open("prefixes.json", "r").read()) +locale.setlocale(locale.LC_TIME, "fr_FR") +locale.setlocale(locale.LC_ALL, "fr_FR") + +for action in actions: + exec(open("./actions/" + action["action"] + ".py").read()) + +exec(open("./actions/_default.py").read()) +exec(open("./actions/wolfram.py").read()) + + +def int_or_str(text): + """Helper function for argument parsing.""" + try: + return int(text) + except ValueError: + return text + + +def callback(indata, frames, time, status): + """This is called (from a separate thread) for each audio block.""" + if status: + print(status, file=sys.stderr) + q.put(bytes(indata)) + + +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument( + '-l', '--list-devices', action='store_true', + help='show list of audio devices and exit') +args, remaining = parser.parse_known_args() +if args.list_devices: + print(sd.query_devices()) + parser.exit(0) +parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + parents=[parser]) +parser.add_argument( + '-f', '--filename', type=str, metavar='FILENAME', + help='audio file to store recording to') +parser.add_argument( + '-m', '--model', type=str, metavar='MODEL_PATH', + help='Path to the model') +parser.add_argument( + '-d', '--device', type=int_or_str, + help='input device (numeric ID or substring)') +parser.add_argument( + '-r', '--samplerate', type=int, help='sampling rate') +args = parser.parse_args(remaining) + +try: + if args.model is None: + args.model = "model" + if not os.path.exists(args.model): + print("Please download a model for your language from https://alphacephei.com/vosk/models") + print("and unpack as 'model' in the current folder.") + parser.exit(0) + if args.samplerate is None: + device_info = sd.query_devices(args.device, 'input') + # soundfile expects an int, sounddevice provides a float: + args.samplerate = int(device_info['default_samplerate']) + + model = vosk.Model(args.model) + + if args.filename: + dump_fn = open(args.filename, "wb") + else: + dump_fn = None + + with sd.RawInputStream(samplerate=args.samplerate, blocksize=8000, device=args.device, dtype='int16', + channels=1, callback=callback): + print('#' * 80) + print('Press Ctrl+C to stop the recording') + print('#' * 80) + + subprocess.run("afplay sounds/ready.wav", shell=True, check=True) + + rec = vosk.KaldiRecognizer(model, args.samplerate) + listening = False + while True: + data = q.get() + if rec.AcceptWaveform(data): + text = json.loads(rec.Result())["text"] + otext = text + process = False + used_prefix = None + for prefix in prefixes: + if text.strip().startswith(prefix): + used_prefix = prefix + process = True + if process: + text = text[len(used_prefix):].strip() + if len(text) > 2: + subprocess.run("afplay sounds/stop.wav", shell=True, check=True) + sys.stdout.write("\033[K") + print(" >>> " + text) + words = text.split(" ") + + sact = None + for action in actions: + for trigger in action["triggers"]: + score = 0 + + for tw in trigger: + if tw in words: + score += 1 + + if score >= len(trigger): + sact = action + + if sact is not None: + try: + runaction(sact["action"], text) + except Exception as e: + print(e) + say("Une erreur s'est produite, réessayez à nouveau dans quelques instants.") + else: + try: + runaction("wolfram", text) + except Exception as e: + print(e) + runaction("_default", text) + else: + if len(text) > 0: + print(" xxx " + text) + listening = False + else: + text = json.loads(rec.PartialResult())["partial"] + process = False + for prefix in prefixes: + if text.strip().startswith(prefix): + used_prefix = prefix + process = True + if process: + sys.stdout.write("\033[K") + if not listening: + listening = True + print("> " + text[len(used_prefix):].strip(), end='\r') + else: + if len(text.strip()) > 0: + sys.stdout.write("\033[K") + print("(" + text + ")", end='\r') + else: + sys.stdout.write("\033[K") + print("...", end='\r') + if dump_fn is not None: + dump_fn.write(data) + +except KeyboardInterrupt: + print('\nDone') + parser.exit(0) |