Speak AI responses during OpenRouter streaming

2026-03-15 16:28:31 +03:00
parent 4b442795f8
commit 715d7b0ee0
1 changed files with 100 additions and 15 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -9,6 +9,8 @@ import time
 from collections import deque
 from pathlib import Path
 import subprocess
 import queue
 import threading
 # Для воспроизведения звуков (mp3)
 try:
@@ -715,27 +717,110 @@ def main():
            full_response = ""
            interrupted = False
-            try:
+            # Streaming TTS: читаем SSE без блокировок, а озвучиваем в отдельном потоке по предложениям.
-                # Streaming от AI
+            tts_queue: "queue.Queue[str | None]" = queue.Queue()
-                stream_generator = ask_ai_stream(list(chat_history))
+            stop_streaming_event = threading.Event()
-                print("🤖 AI: ", end="", flush=True)
+            def _split_speakable(text: str) -> tuple[str, str]:
                """
                Возвращает (готовое_для_озвучивания, остаток).
                Стараемся говорить по предложениям, но не режем слишком мелко.
                """
                if not text:
                    return "", ""
-                for chunk in stream_generator:
+                # Ждем хотя бы немного текста, чтобы не "пиликать" по 1-2 словам.
-                    full_response += chunk
+                min_chars = 55
-                    print(chunk, end="", flush=True)
+                hard_flush_chars = 220
-            except Exception as e:
+                if len(text) < min_chars and "\n" not in text:
-                print(f"\n❌ Ошибка: {e}")
+                    return "", text
-                speak("Произошла ошибка при получении ответа.")
+
-            else:
+                # Находим границу предложения.
-                clean_ai_response = clean_response(full_response, language="ru")
+                boundary = -1
-                if clean_ai_response.strip():
+                for i, ch in enumerate(text):
-                    interrupted = not speak(
+                    if ch == "\n":
-                        clean_ai_response,
+                        boundary = i
                    elif ch in ".!?":
                        # Не режем 3.14 и похожие случаи.
                        prev_is_digit = i > 0 and text[i - 1].isdigit()
                        next_is_digit = i + 1 < len(text) and text[i + 1].isdigit()
                        if ch == "." and prev_is_digit and next_is_digit:
                            continue
                        boundary = i
                if boundary == -1:
                    if len(text) >= hard_flush_chars:
                        boundary = hard_flush_chars - 1
                    else:
                        return "", text
                speak_part = text[: boundary + 1].strip()
                rest = text[boundary + 1 :].lstrip()
                return speak_part, rest
            def _tts_worker():
                nonlocal interrupted
                while True:
                    item = tts_queue.get()
                    if item is None:
                        return
                    if stop_streaming_event.is_set():
                        continue
                    clean_part = clean_response(item, language="ru")
                    if not clean_part.strip():
                        continue
                    ok = speak(
                        clean_part,
                        check_interrupt=check_wakeword_once,
                        language="ru",
                    )
                    if not ok:
                        interrupted = True
                        stop_streaming_event.set()
                        # Опустошим очередь, чтобы не озвучивать "хвост" после прерывания.
                        try:
                            while True:
                                tts_queue.get_nowait()
                        except queue.Empty:
                            pass
                        return
            tts_thread = threading.Thread(target=_tts_worker, daemon=True)
            tts_thread.start()
            print("🤖 AI: ", end="", flush=True)
            try:
                stream_generator = ask_ai_stream(list(chat_history))
                buffer = ""
                for chunk in stream_generator:
                    if stop_streaming_event.is_set():
                        break
                    if not chunk:
                        continue
                    full_response += chunk
                    buffer += chunk
                    print(chunk, end="", flush=True)
                    while True:
                        speak_part, buffer = _split_speakable(buffer)
                        if not speak_part:
                            break
                        tts_queue.put(speak_part)
            except Exception as e:
                print(f"\n❌ Ошибка: {e}")
                tts_queue.put("Произошла ошибка при получении ответа.")
            finally:
                # Договорим остаток, если не было прерывания.
                if not stop_streaming_event.is_set():
                    tail = buffer.strip()
                    if tail:
                        tts_queue.put(tail)
                tts_queue.put(None)
                tts_thread.join(timeout=20)
            print()