translator но без озвучивания слов на английском

2026-01-09 01:01:27 +03:00
parent 53809c03f4
commit 242ead5355
11 changed files with 845 additions and 238 deletions
--- a/stt.py
+++ b/stt.py
@@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API.
 Recognizes speech from microphone using streaming WebSocket.
 Supports Russian (default) and English.
 """
+
 import os
 import asyncio
 import threading
@@ -20,6 +21,7 @@ from deepgram import (
 # Configure logging to suppress debug noise
 logging.getLogger("deepgram").setLevel(logging.WARNING)

+
 class SpeechRecognizer:
    """Speech recognizer using Deepgram streaming."""

@@ -29,18 +31,18 @@ class SpeechRecognizer:
        self.stream = None
        self.transcript = ""
        self.lock = threading.Lock()
-        
+
    def initialize(self):
        """Initialize Deepgram client and PyAudio."""
        if not DEEPGRAM_API_KEY:
            raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.")
-            
+
        print("📦 Инициализация Deepgram STT...")
        config = DeepgramClientOptions(
            verbose=logging.WARNING,
        )
        self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config)
-        
+
        self.pa = pyaudio.PyAudio()
        print("✅ Deepgram клиент готов")

@@ -59,13 +61,14 @@ class SpeechRecognizer:
    async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
        """Async loop to send audio and wait for results."""
        self.transcript = ""
-        
+        transcript_parts = []
+
        loop = asyncio.get_running_loop()
        stream = self._get_stream()
-        
+
        stop_event = asyncio.Event()
        speech_started_event = asyncio.Event()
-        
+
        # We need access to the outer 'self' (SpeechRecognizer instance)
        speech_recognizer_self = self

@@ -74,9 +77,11 @@ class SpeechRecognizer:
            if len(sentence) == 0:
                return
            if result.is_final:
-                print(f"📝 Частичный результат: {sentence}")
                with speech_recognizer_self.lock:
-                    speech_recognizer_self.transcript = sentence
+                    transcript_parts.append(sentence)
+                    speech_recognizer_self.transcript = " ".join(
+                        transcript_parts
+                    ).strip()

        def on_speech_started(unused_self, speech_started, **kwargs):
            loop.call_soon_threadsafe(speech_started_event.set)
@@ -102,7 +107,7 @@ class SpeechRecognizer:
            channels=1,
            sample_rate=SAMPLE_RATE,
            interim_results=True,
-            utterance_end_ms="1200", 
+            utterance_end_ms=1200,
            vad_events=True,
        )

@@ -133,38 +138,45 @@ class SpeechRecognizer:
                print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}")

        sender_task = asyncio.create_task(send_audio())
-        
+
        try:
            # 1. Wait for speech to start (detection_timeout)
            if detection_timeout:
                try:
-                    await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout)
+                    await asyncio.wait_for(
+                        speech_started_event.wait(), timeout=detection_timeout
+                    )
                except asyncio.TimeoutError:
                    # print("Detection timeout - no speech")
                    stop_event.set()
-            
+
            # 2. If started (or no detection timeout), wait for completion
            if not stop_event.is_set():
                await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds)
-                
+
        except asyncio.TimeoutError:
            # print("Global timeout")
            pass
-            
+
        stop_event.set()
        await sender_task
        # Finish is synchronous
        dg_connection.finish()
-        
+
        return self.transcript

-    def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
+    def listen(
+        self,
+        timeout_seconds: float = 7.0,
+        detection_timeout: float = None,
+        lang: str = "ru",
+    ) -> str:
        """
        Listen to microphone and transcribe speech.
        """
        if not self.dg_client:
            self.initialize()
-            
+
        self.current_lang = lang
        print(f"🎙️ Слушаю ({lang})...")

@@ -172,16 +184,18 @@ class SpeechRecognizer:
        dg_connection = self.dg_client.listen.live.v("1")

        try:
-            transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout))
-            
+            transcript = asyncio.run(
+                self._process_audio(dg_connection, timeout_seconds, detection_timeout)
+            )
+
            final_text = transcript.strip() if transcript else ""
            if final_text:
                print(f"📝 Распознано: {final_text}")
            else:
                print("⚠️ Речь не распознана")
-                
+
            return final_text
-            
+
        except Exception as e:
            print(f"❌ Ошибка STT: {e}")
            return ""
@@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer:
    return _recognizer


-def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
+def listen(
+    timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru"
+) -> str:
    """Listen to microphone and return transcribed text."""
    return get_recognizer().listen(timeout_seconds, detection_timeout, lang)

@@ -218,4 +234,4 @@ def cleanup():
    global _recognizer
    if _recognizer:
        _recognizer.cleanup()
-        _recognizer = None
+        _recognizer = None