From 93eb7479e83494690e4eb51f2fb662c651a9fd5e Mon Sep 17 00:00:00 2001 From: Botond Hende Date: Wed, 20 Nov 2024 13:37:51 +0100 Subject: voice control with whisper --- __main__.py | 3 +- modules/input_handlers/pipewire_record.py | 66 +++++++++++++++++++++++++++++++ record.sh | 15 +++++++ requirements.txt | 1 + 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 modules/input_handlers/pipewire_record.py create mode 100755 record.sh diff --git a/__main__.py b/__main__.py index 4677b7c..4aa3117 100644 --- a/__main__.py +++ b/__main__.py @@ -8,6 +8,7 @@ from .modules.hassil.recognize import recognize from .modules.hassil.util import merge_dict from .modules.hassil.intents import Intents, TextSlotList from .modules.input_handlers.stdin_input import get_input_stdin +from .modules.input_handlers.pipewire_record import get_input_pw_record from .modules.intents import * @@ -38,7 +39,7 @@ def main(): intents = Intents.from_dict(input_dict) - for input_text in get_input_stdin(): + for input_text in get_input_pw_record(): result = recognize(input_text, intents, slot_lists=slot_lists) if result is not None: result_dict = { diff --git a/modules/input_handlers/pipewire_record.py b/modules/input_handlers/pipewire_record.py new file mode 100644 index 0000000..8584ad3 --- /dev/null +++ b/modules/input_handlers/pipewire_record.py @@ -0,0 +1,66 @@ +import subprocess +import os.path +import signal +import sys +from time import sleep + +import whisper + +FIFO_PATH = "/tmp/hestia-listening" +RECORD_PATH = "/tmp/hestia-record.mp3" + +def get_input_pw_record(): + device = get_device() + + if os.path.exists(FIFO_PATH): + os.remove(FIFO_PATH) + + os.mkfifo(FIFO_PATH) + + while True: + with open(FIFO_PATH): + pass + # TODO "I'm listening" + + try: + ps = subprocess.Popen((f"pw-record --target {device} {RECORD_PATH}",), shell=True) + with open(FIFO_PATH): + print("finished") + ps.send_signal(signal.SIGINT) + # TODO "acknowledged" + except: + if "ps" in locals(): + ps.kill() + # TODO "error" + # TODO exit gracefully or try to recover + sys.exit() + + model = whisper.load_model("base") + + audio = whisper.load_audio(RECORD_PATH) + audio = whisper.pad_or_trim(audio) + + mel = whisper.log_mel_spectrogram(audio).to(model.device) + options = whisper.DecodingOptions(language="en", fp16=False) + result = whisper.decode(model, mel, options) + result_text = result.text.replace(",", "").replace(".", "").lower() + + print(result_text) + + yield result_text + +def get_device() -> str: + already_warned = False + + while True: + ps = subprocess.Popen(('pw-cli ls | \\grep -Poi "(?<=node.name = \\").*mic.*(?=\\")"',), shell=True, stdout=subprocess.PIPE) + ps.wait() + + if ps.returncode == 0: + return ps.stdout.read().decode().strip() + + elif not already_warned: + already_warned = True + # TODO warn about device not found + + sleep(3) \ No newline at end of file diff --git a/record.sh b/record.sh new file mode 100755 index 0000000..ce866da --- /dev/null +++ b/record.sh @@ -0,0 +1,15 @@ +#!/bin/bash -eu + +FIFO_PATH="/tmp/hestia-listening" + +if [[ -p "$FIFO_PATH" ]]; then + echo >> "$FIFO_PATH" +else + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + MODULE_NAME=$(basename "$SCRIPT_DIR") + cd "$SCRIPT_DIR" + source venv/bin/activate + cd .. + python -m "$MODULE_NAME" +fi + diff --git a/requirements.txt b/requirements.txt index 80335f8..1294733 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ PyYAML>=6.0 unicode-rbnf>=1 +openai-whisper -- cgit v1.2.3-70-g09d2