translator но без озвучивания слов на английском

This commit is contained in:
2026-01-09 01:01:27 +03:00
parent 53809c03f4
commit 242ead5355
11 changed files with 845 additions and 238 deletions

62
stt.py
View File

@@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API.
Recognizes speech from microphone using streaming WebSocket.
Supports Russian (default) and English.
"""
import os
import asyncio
import threading
@@ -20,6 +21,7 @@ from deepgram import (
# Configure logging to suppress debug noise
logging.getLogger("deepgram").setLevel(logging.WARNING)
class SpeechRecognizer:
"""Speech recognizer using Deepgram streaming."""
@@ -29,18 +31,18 @@ class SpeechRecognizer:
self.stream = None
self.transcript = ""
self.lock = threading.Lock()
def initialize(self):
"""Initialize Deepgram client and PyAudio."""
if not DEEPGRAM_API_KEY:
raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.")
print("📦 Инициализация Deepgram STT...")
config = DeepgramClientOptions(
verbose=logging.WARNING,
)
self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config)
self.pa = pyaudio.PyAudio()
print("✅ Deepgram клиент готов")
@@ -59,13 +61,14 @@ class SpeechRecognizer:
async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
"""Async loop to send audio and wait for results."""
self.transcript = ""
transcript_parts = []
loop = asyncio.get_running_loop()
stream = self._get_stream()
stop_event = asyncio.Event()
speech_started_event = asyncio.Event()
# We need access to the outer 'self' (SpeechRecognizer instance)
speech_recognizer_self = self
@@ -74,9 +77,11 @@ class SpeechRecognizer:
if len(sentence) == 0:
return
if result.is_final:
print(f"📝 Частичный результат: {sentence}")
with speech_recognizer_self.lock:
speech_recognizer_self.transcript = sentence
transcript_parts.append(sentence)
speech_recognizer_self.transcript = " ".join(
transcript_parts
).strip()
def on_speech_started(unused_self, speech_started, **kwargs):
loop.call_soon_threadsafe(speech_started_event.set)
@@ -102,7 +107,7 @@ class SpeechRecognizer:
channels=1,
sample_rate=SAMPLE_RATE,
interim_results=True,
utterance_end_ms="1200",
utterance_end_ms=1200,
vad_events=True,
)
@@ -133,38 +138,45 @@ class SpeechRecognizer:
print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}")
sender_task = asyncio.create_task(send_audio())
try:
# 1. Wait for speech to start (detection_timeout)
if detection_timeout:
try:
await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout)
await asyncio.wait_for(
speech_started_event.wait(), timeout=detection_timeout
)
except asyncio.TimeoutError:
# print("Detection timeout - no speech")
stop_event.set()
# 2. If started (or no detection timeout), wait for completion
if not stop_event.is_set():
await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds)
except asyncio.TimeoutError:
# print("Global timeout")
pass
stop_event.set()
await sender_task
# Finish is synchronous
dg_connection.finish()
return self.transcript
def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
def listen(
self,
timeout_seconds: float = 7.0,
detection_timeout: float = None,
lang: str = "ru",
) -> str:
"""
Listen to microphone and transcribe speech.
"""
if not self.dg_client:
self.initialize()
self.current_lang = lang
print(f"🎙️ Слушаю ({lang})...")
@@ -172,16 +184,18 @@ class SpeechRecognizer:
dg_connection = self.dg_client.listen.live.v("1")
try:
transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout))
transcript = asyncio.run(
self._process_audio(dg_connection, timeout_seconds, detection_timeout)
)
final_text = transcript.strip() if transcript else ""
if final_text:
print(f"📝 Распознано: {final_text}")
else:
print("⚠️ Речь не распознана")
return final_text
except Exception as e:
print(f"❌ Ошибка STT: {e}")
return ""
@@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer:
return _recognizer
def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
def listen(
timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru"
) -> str:
"""Listen to microphone and return transcribed text."""
return get_recognizer().listen(timeout_seconds, detection_timeout, lang)
@@ -218,4 +234,4 @@ def cleanup():
global _recognizer
if _recognizer:
_recognizer.cleanup()
_recognizer = None
_recognizer = None