#!/usr/bin/env python3 import argparse import os import queue import sounddevice as sd import vosk import sys import json import subprocess import locale subprocess.run("afplay sounds/load.wav", shell=True, check=True) def say(message="Une erreur s'est produite"): subprocess.run("say -v Audrey \"" + message.replace("'", "").replace('"', "\\\"").replace("`", "\\`") .replace("$", "\\$").replace("{", "\\{").replace("}", "\\}") + "\"", shell=True, check=True) def runaction(action="_default",input=""): try: locals()["_action_" + action](input) except: globals()["_action_" + action](input) q = queue.Queue() actions = json.loads(open("actions.json", "r").read()) prefixes = json.loads(open("prefixes.json", "r").read()) locale.setlocale(locale.LC_TIME, "fr_FR") locale.setlocale(locale.LC_ALL, "fr_FR") for action in actions: exec(open("./actions/" + action["action"] + ".py").read()) exec(open("./actions/_default.py").read()) exec(open("./actions/wolfram.py").read()) def int_or_str(text): """Helper function for argument parsing.""" try: return int(text) except ValueError: return text def callback(indata, frames, time, status): """This is called (from a separate thread) for each audio block.""" if status: print(status, file=sys.stderr) q.put(bytes(indata)) parser = argparse.ArgumentParser(add_help=False) parser.add_argument( '-l', '--list-devices', action='store_true', help='show list of audio devices and exit') args, remaining = parser.parse_known_args() if args.list_devices: print(sd.query_devices()) parser.exit(0) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, parents=[parser]) parser.add_argument( '-f', '--filename', type=str, metavar='FILENAME', help='audio file to store recording to') parser.add_argument( '-m', '--model', type=str, metavar='MODEL_PATH', help='Path to the model') parser.add_argument( '-d', '--device', type=int_or_str, help='input device (numeric ID or substring)') parser.add_argument( '-r', '--samplerate', type=int, help='sampling rate') args = parser.parse_args(remaining) try: if args.model is None: args.model = "model" if not os.path.exists(args.model): print("Please download a model for your language from https://alphacephei.com/vosk/models") print("and unpack as 'model' in the current folder.") parser.exit(0) if args.samplerate is None: device_info = sd.query_devices(args.device, 'input') # soundfile expects an int, sounddevice provides a float: args.samplerate = int(device_info['default_samplerate']) model = vosk.Model(args.model) if args.filename: dump_fn = open(args.filename, "wb") else: dump_fn = None with sd.RawInputStream(samplerate=args.samplerate, blocksize=8000, device=args.device, dtype='int16', channels=1, callback=callback): print('#' * 80) print('Press Ctrl+C to stop the recording') print('#' * 80) subprocess.run("afplay sounds/ready.wav", shell=True, check=True) rec = vosk.KaldiRecognizer(model, args.samplerate) listening = False while True: data = q.get() if rec.AcceptWaveform(data): text = json.loads(rec.Result())["text"] otext = text process = False used_prefix = None for prefix in prefixes: if text.strip().startswith(prefix): used_prefix = prefix process = True if process: text = text[len(used_prefix):].strip() if len(text) > 2: subprocess.run("afplay sounds/stop.wav", shell=True, check=True) sys.stdout.write("\033[K") print(" >>> " + text) words = text.split(" ") sact = None for action in actions: for trigger in action["triggers"]: score = 0 for tw in trigger: if tw in words: score += 1 if score >= len(trigger): sact = action if sact is not None: try: runaction(sact["action"], text) except Exception as e: print(e) say("Une erreur s'est produite, réessayez à nouveau dans quelques instants.") else: try: runaction("wolfram", text) except Exception as e: print(e) runaction("_default", text) else: if len(text) > 0: print(" xxx " + text) listening = False else: text = json.loads(rec.PartialResult())["partial"] process = False for prefix in prefixes: if text.strip().startswith(prefix): used_prefix = prefix process = True if process: sys.stdout.write("\033[K") if not listening: listening = True print("> " + text[len(used_prefix):].strip(), end='\r') else: if len(text.strip()) > 0: sys.stdout.write("\033[K") print("(" + text + ")", end='\r') else: sys.stdout.write("\033[K") print("...", end='\r') if dump_fn is not None: dump_fn.write(data) except KeyboardInterrupt: print('\nDone') parser.exit(0)