voice control with whisper

author: Botond Hende <nettingman@gmail.com> 2024-11-20 13:37:51 +0100
committer: Botond Hende <nettingman@gmail.com> 2024-11-20 13:37:51 +0100
commit: 93eb7479e83494690e4eb51f2fb662c651a9fd5e (patch)
tree: e73548e7df148a66590b23c4a42d2c6dbe6db739
parent: 528d65c6bd9954cfd96de57de05498dc4c563e0f (diff)
4 files changed, 84 insertions, 1 deletions
diff --git a/__main__.py b/__main__.py
index 4677b7c..4aa3117 100644
--- a/__main__.py
+++ b/__main__.py
@@ -8,6 +8,7 @@ from .modules.hassil.recognize import recognize
 from .modules.hassil.util import merge_dict
 from .modules.hassil.intents import Intents, TextSlotList
 from .modules.input_handlers.stdin_input import get_input_stdin
+from .modules.input_handlers.pipewire_record import get_input_pw_record
 
 from .modules.intents import *
 
@@ -38,7 +39,7 @@ def main():
 
     intents = Intents.from_dict(input_dict)
 
-    for input_text in get_input_stdin():
+    for input_text in get_input_pw_record():
         result = recognize(input_text, intents, slot_lists=slot_lists)
         if result is not None:
             result_dict = {
diff --git a/modules/input_handlers/pipewire_record.py b/modules/input_handlers/pipewire_record.py
new file mode 100644
index 0000000..8584ad3
--- /dev/null
+++ b/modules/input_handlers/pipewire_record.py
@@ -0,0 +1,66 @@
+import subprocess
+import os.path
+import signal
+import sys
+from time import sleep
+
+import whisper
+
+FIFO_PATH = "/tmp/hestia-listening"
+RECORD_PATH = "/tmp/hestia-record.mp3"
+
+def get_input_pw_record():
+    device = get_device()
+
+    if os.path.exists(FIFO_PATH):
+        os.remove(FIFO_PATH)
+
+    os.mkfifo(FIFO_PATH)
+
+    while True:
+        with open(FIFO_PATH):
+            pass
+            # TODO "I'm listening"
+
+        try:
+            ps = subprocess.Popen((f"pw-record --target {device} {RECORD_PATH}",), shell=True)
+            with open(FIFO_PATH):
+                print("finished")
+            ps.send_signal(signal.SIGINT)
+            # TODO "acknowledged"
+        except:
+            if "ps" in locals():
+                ps.kill()
+            # TODO "error"
+            # TODO exit gracefully or try to recover
+            sys.exit()
+
+        model = whisper.load_model("base")
+
+        audio = whisper.load_audio(RECORD_PATH)
+        audio = whisper.pad_or_trim(audio)
+
+        mel = whisper.log_mel_spectrogram(audio).to(model.device)
+        options = whisper.DecodingOptions(language="en", fp16=False)
+        result = whisper.decode(model, mel, options)
+        result_text = result.text.replace(",", "").replace(".", "").lower()
+
+        print(result_text)
+
+        yield result_text
+
+def get_device() -> str:
+    already_warned = False
+
+    while True:
+        ps = subprocess.Popen(('pw-cli ls | \\grep -Poi "(?<=node.name = \\").*mic.*(?=\\")"',), shell=True, stdout=subprocess.PIPE)
+        ps.wait()
+
+        if ps.returncode == 0:
+            return ps.stdout.read().decode().strip()
+
+        elif not already_warned:
+            already_warned = True
+            # TODO warn about device not found
+
+        sleep(3)
+\ No newline at end of file
diff --git a/record.sh b/record.sh
new file mode 100755
index 0000000..ce866da
--- /dev/null
+++ b/record.sh
@@ -0,0 +1,15 @@
+#!/bin/bash -eu
+
+FIFO_PATH="/tmp/hestia-listening"
+
+if [[ -p "$FIFO_PATH" ]]; then
+  echo >> "$FIFO_PATH"
+else
+  SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+  MODULE_NAME=$(basename "$SCRIPT_DIR")
+  cd "$SCRIPT_DIR"
+  source venv/bin/activate
+  cd ..
+  python -m "$MODULE_NAME"
+fi
+
diff --git a/requirements.txt b/requirements.txt
index 80335f8..1294733 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 PyYAML>=6.0
 unicode-rbnf>=1
+openai-whisper
author	Botond Hende <nettingman@gmail.com>	2024-11-20 13:37:51 +0100
committer	Botond Hende <nettingman@gmail.com>	2024-11-20 13:37:51 +0100
commit	93eb7479e83494690e4eb51f2fb662c651a9fd5e (patch)
tree	e73548e7df148a66590b23c4a42d2c6dbe6db739
parent	528d65c6bd9954cfd96de57de05498dc4c563e0f (diff)