summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBotond Hende <nettingman@gmail.com>2024-11-20 13:37:51 +0100
committerBotond Hende <nettingman@gmail.com>2024-11-20 13:37:51 +0100
commit93eb7479e83494690e4eb51f2fb662c651a9fd5e (patch)
treee73548e7df148a66590b23c4a42d2c6dbe6db739
parent528d65c6bd9954cfd96de57de05498dc4c563e0f (diff)
voice control with whisper
-rw-r--r--__main__.py3
-rw-r--r--modules/input_handlers/pipewire_record.py66
-rwxr-xr-xrecord.sh15
-rw-r--r--requirements.txt1
4 files changed, 84 insertions, 1 deletions
diff --git a/__main__.py b/__main__.py
index 4677b7c..4aa3117 100644
--- a/__main__.py
+++ b/__main__.py
@@ -8,6 +8,7 @@ from .modules.hassil.recognize import recognize
from .modules.hassil.util import merge_dict
from .modules.hassil.intents import Intents, TextSlotList
from .modules.input_handlers.stdin_input import get_input_stdin
+from .modules.input_handlers.pipewire_record import get_input_pw_record
from .modules.intents import *
@@ -38,7 +39,7 @@ def main():
intents = Intents.from_dict(input_dict)
- for input_text in get_input_stdin():
+ for input_text in get_input_pw_record():
result = recognize(input_text, intents, slot_lists=slot_lists)
if result is not None:
result_dict = {
diff --git a/modules/input_handlers/pipewire_record.py b/modules/input_handlers/pipewire_record.py
new file mode 100644
index 0000000..8584ad3
--- /dev/null
+++ b/modules/input_handlers/pipewire_record.py
@@ -0,0 +1,66 @@
+import subprocess
+import os.path
+import signal
+import sys
+from time import sleep
+
+import whisper
+
+FIFO_PATH = "/tmp/hestia-listening"
+RECORD_PATH = "/tmp/hestia-record.mp3"
+
+def get_input_pw_record():
+ device = get_device()
+
+ if os.path.exists(FIFO_PATH):
+ os.remove(FIFO_PATH)
+
+ os.mkfifo(FIFO_PATH)
+
+ while True:
+ with open(FIFO_PATH):
+ pass
+ # TODO "I'm listening"
+
+ try:
+ ps = subprocess.Popen((f"pw-record --target {device} {RECORD_PATH}",), shell=True)
+ with open(FIFO_PATH):
+ print("finished")
+ ps.send_signal(signal.SIGINT)
+ # TODO "acknowledged"
+ except:
+ if "ps" in locals():
+ ps.kill()
+ # TODO "error"
+ # TODO exit gracefully or try to recover
+ sys.exit()
+
+ model = whisper.load_model("base")
+
+ audio = whisper.load_audio(RECORD_PATH)
+ audio = whisper.pad_or_trim(audio)
+
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
+ options = whisper.DecodingOptions(language="en", fp16=False)
+ result = whisper.decode(model, mel, options)
+ result_text = result.text.replace(",", "").replace(".", "").lower()
+
+ print(result_text)
+
+ yield result_text
+
+def get_device() -> str:
+ already_warned = False
+
+ while True:
+ ps = subprocess.Popen(('pw-cli ls | \\grep -Poi "(?<=node.name = \\").*mic.*(?=\\")"',), shell=True, stdout=subprocess.PIPE)
+ ps.wait()
+
+ if ps.returncode == 0:
+ return ps.stdout.read().decode().strip()
+
+ elif not already_warned:
+ already_warned = True
+ # TODO warn about device not found
+
+ sleep(3) \ No newline at end of file
diff --git a/record.sh b/record.sh
new file mode 100755
index 0000000..ce866da
--- /dev/null
+++ b/record.sh
@@ -0,0 +1,15 @@
+#!/bin/bash -eu
+
+FIFO_PATH="/tmp/hestia-listening"
+
+if [[ -p "$FIFO_PATH" ]]; then
+ echo >> "$FIFO_PATH"
+else
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ MODULE_NAME=$(basename "$SCRIPT_DIR")
+ cd "$SCRIPT_DIR"
+ source venv/bin/activate
+ cd ..
+ python -m "$MODULE_NAME"
+fi
+
diff --git a/requirements.txt b/requirements.txt
index 80335f8..1294733 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
PyYAML>=6.0
unicode-rbnf>=1
+openai-whisper