""" Speech-to-Text module using Vosk. Recognizes Russian speech from microphone. """ import json import pyaudio from vosk import Model, KaldiRecognizer from config import VOSK_MODEL_PATH, SAMPLE_RATE class SpeechRecognizer: """Speech recognizer using Vosk.""" def __init__(self): self.model = None self.recognizer = None self.pa = None self.stream = None def initialize(self): """Initialize Vosk model and audio stream.""" print("📦 Загрузка модели Vosk...") self.model = Model(str(VOSK_MODEL_PATH)) self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE) self.recognizer.SetWords(True) self.pa = pyaudio.PyAudio() self.stream = self.pa.open( rate=SAMPLE_RATE, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=4096 ) print("✅ Модель Vosk загружена") def listen(self, timeout_seconds: float = 5.0) -> str: """ Listen to microphone and transcribe speech. Args: timeout_seconds: Maximum time to listen for speech Returns: Transcribed text from speech """ if not self.model: self.initialize() print("🎙️ Слушаю... (говорите)") # Reset recognizer for new recognition self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE) frames_to_read = int(SAMPLE_RATE * timeout_seconds / 4096) silence_frames = 0 max_silence_frames = 10 # About 2.5 seconds of silence for _ in range(frames_to_read): data = self.stream.read(4096, exception_on_overflow=False) if self.recognizer.AcceptWaveform(data): result = json.loads(self.recognizer.Result()) text = result.get("text", "").strip() if text: print(f"📝 Распознано: {text}") return text silence_frames += 1 else: # Check partial result partial = json.loads(self.recognizer.PartialResult()) if partial.get("partial", ""): silence_frames = 0 else: silence_frames += 1 # Stop if too much silence after speech if silence_frames > max_silence_frames: break # Get final result result = json.loads(self.recognizer.FinalResult()) text = result.get("text", "").strip() if text: print(f"📝 Распознано: {text}") else: print("⚠️ Речь не распознана") return text def cleanup(self): """Release resources.""" if self.stream: self.stream.close() if self.pa: self.pa.terminate() # Global instance _recognizer = None def get_recognizer() -> SpeechRecognizer: """Get or create speech recognizer instance.""" global _recognizer if _recognizer is None: _recognizer = SpeechRecognizer() return _recognizer def listen(timeout_seconds: float = 5.0) -> str: """Listen to microphone and return transcribed text.""" return get_recognizer().listen(timeout_seconds) def cleanup(): """Cleanup recognizer resources.""" global _recognizer if _recognizer: _recognizer.cleanup() _recognizer = None