""" Local offline Speech-to-Text module using Vosk. Used for simple command detection (like "stop") without internet. """ import os import sys import json import pyaudio from vosk import Model, KaldiRecognizer from config import VOSK_MODEL_PATH, SAMPLE_RATE class LocalRecognizer: def __init__(self): self.model = None self.rec = None self.pa = None self.stream = None def initialize(self): if not os.path.exists(VOSK_MODEL_PATH): print(f"❌ Ошибка: Vosk модель не найдена по пути {VOSK_MODEL_PATH}") return False print("📦 Инициализация локального STT (Vosk)...") # Redirect stderr to suppress Vosk logs try: null_fd = os.open(os.devnull, os.O_WRONLY) old_stderr = os.dup(2) sys.stderr.flush() os.dup2(null_fd, 2) os.close(null_fd) self.model = Model(str(VOSK_MODEL_PATH)) # Restore stderr os.dup2(old_stderr, 2) os.close(old_stderr) except Exception as e: print(f"Error initializing Vosk: {e}") return False self.rec = KaldiRecognizer(self.model, SAMPLE_RATE) self.pa = pyaudio.PyAudio() return True def listen_for_keywords(self, keywords: list, timeout: float = 10.0) -> str: """ Listen for specific keywords locally. Returns the recognized keyword if found, or empty string. """ if not self.model: if not self.initialize(): return "" # Open stream try: stream = self.pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=4096) stream.start_stream() except Exception as e: print(f"❌ Ошибка микрофона: {e}") return "" import time start_time = time.time() print(f"👂 Локальное слушание ожидает: {keywords}") detected_text = "" try: while time.time() - start_time < timeout: data = stream.read(4096, exception_on_overflow=False) if self.rec.AcceptWaveform(data): res = json.loads(self.rec.Result()) text = res.get("text", "") if text: print(f"📝 Локально: {text}") # Check against keywords for kw in keywords: if kw in text: detected_text = text break else: # Partial result res = json.loads(self.rec.PartialResult()) partial = res.get("partial", "") if partial: for kw in keywords: if kw in partial: detected_text = partial break if detected_text: break finally: stream.stop_stream() stream.close() return detected_text def cleanup(self): if self.pa: self.pa.terminate() # Global instance _local_recognizer = None def get_local_recognizer(): global _local_recognizer if _local_recognizer is None: _local_recognizer = LocalRecognizer() return _local_recognizer def listen_for_keywords(keywords: list, timeout: float = 5.0) -> str: """Listen for keywords using Vosk.""" return get_local_recognizer().listen_for_keywords(keywords, timeout)