Speak AI responses during OpenRouter streaming

2026-03-15 16:28:31 +03:00
parent 4b442795f8
commit 715d7b0ee0
1 changed files with 100 additions and 15 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -9,6 +9,8 @@ import time
 from collections import deque
 from pathlib import Path
 import subprocess
+import queue
+import threading

 # Для воспроизведения звуков (mp3)
 try:
@@ -715,27 +717,110 @@ def main():
            full_response = ""
            interrupted = False

-            try:
-                # Streaming от AI
-                stream_generator = ask_ai_stream(list(chat_history))
+            # Streaming TTS: читаем SSE без блокировок, а озвучиваем в отдельном потоке по предложениям.
+            tts_queue: "queue.Queue[str | None]" = queue.Queue()
+            stop_streaming_event = threading.Event()

-                print("🤖 AI: ", end="", flush=True)
+            def _split_speakable(text: str) -> tuple[str, str]:
+                """
+                Возвращает (готовое_для_озвучивания, остаток).
+                Стараемся говорить по предложениям, но не режем слишком мелко.
+                """
+                if not text:
+                    return "", ""

-                for chunk in stream_generator:
-                    full_response += chunk
-                    print(chunk, end="", flush=True)
+                # Ждем хотя бы немного текста, чтобы не "пиликать" по 1-2 словам.
+                min_chars = 55
+                hard_flush_chars = 220

-            except Exception as e:
-                print(f"\n❌ Ошибка: {e}")
-                speak("Произошла ошибка при получении ответа.")
+                if len(text) < min_chars and "\n" not in text:
+                    return "", text
+
+                # Находим границу предложения.
+                boundary = -1
+                for i, ch in enumerate(text):
+                    if ch == "\n":
+                        boundary = i
+                    elif ch in ".!?":
+                        # Не режем 3.14 и похожие случаи.
+                        prev_is_digit = i > 0 and text[i - 1].isdigit()
+                        next_is_digit = i + 1 < len(text) and text[i + 1].isdigit()
+                        if ch == "." and prev_is_digit and next_is_digit:
+                            continue
+                        boundary = i
+
+                if boundary == -1:
+                    if len(text) >= hard_flush_chars:
+                        boundary = hard_flush_chars - 1
                    else:
-                clean_ai_response = clean_response(full_response, language="ru")
-                if clean_ai_response.strip():
-                    interrupted = not speak(
-                        clean_ai_response,
+                        return "", text
+
+                speak_part = text[: boundary + 1].strip()
+                rest = text[boundary + 1 :].lstrip()
+                return speak_part, rest
+
+            def _tts_worker():
+                nonlocal interrupted
+                while True:
+                    item = tts_queue.get()
+                    if item is None:
+                        return
+                    if stop_streaming_event.is_set():
+                        continue
+
+                    clean_part = clean_response(item, language="ru")
+                    if not clean_part.strip():
+                        continue
+
+                    ok = speak(
+                        clean_part,
                        check_interrupt=check_wakeword_once,
                        language="ru",
                    )
+                    if not ok:
+                        interrupted = True
+                        stop_streaming_event.set()
+                        # Опустошим очередь, чтобы не озвучивать "хвост" после прерывания.
+                        try:
+                            while True:
+                                tts_queue.get_nowait()
+                        except queue.Empty:
+                            pass
+                        return
+
+            tts_thread = threading.Thread(target=_tts_worker, daemon=True)
+            tts_thread.start()
+
+            print("🤖 AI: ", end="", flush=True)
+
+            try:
+                stream_generator = ask_ai_stream(list(chat_history))
+                buffer = ""
+                for chunk in stream_generator:
+                    if stop_streaming_event.is_set():
+                        break
+                    if not chunk:
+                        continue
+                    full_response += chunk
+                    buffer += chunk
+                    print(chunk, end="", flush=True)
+
+                    while True:
+                        speak_part, buffer = _split_speakable(buffer)
+                        if not speak_part:
+                            break
+                        tts_queue.put(speak_part)
+            except Exception as e:
+                print(f"\n❌ Ошибка: {e}")
+                tts_queue.put("Произошла ошибка при получении ответа.")
+            finally:
+                # Договорим остаток, если не было прерывания.
+                if not stop_streaming_event.is_set():
+                    tail = buffer.strip()
+                    if tail:
+                        tts_queue.put(tail)
+                tts_queue.put(None)
+                tts_thread.join(timeout=20)

            print()