From 6add70fcd2de87b221797e443086692e27e9658c Mon Sep 17 00:00:00 2001
From: future <nearkaido@gmail.com>
Date: Sun, 15 Mar 2026 16:22:00 +0300
Subject: [PATCH] Fix TTS time phrases and STT cleanup

---
 .gitignore          |  3 ++
 app/audio/stt.py    | 77 +++++++++++++++++++++++++++++++++++++++++++--
 app/core/cleaner.py | 67 +++++++++++++++++++++++++++++++++++++++
 app/main.py         |  1 +
 4 files changed, 146 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7749cc9..52d117e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,9 @@ vosk-model-*/
 # VS Code
 .vscode/
 
+# Runtime state
+data/music_state.json
+
 
 .beads
 .gitattributes
diff --git a/app/audio/stt.py b/app/audio/stt.py
index 8ee2e43..1c4483b 100644
--- a/app/audio/stt.py
+++ b/app/audio/stt.py
@@ -234,6 +234,33 @@ class SpeechRecognizer:
 
         return True
 
+    def _run_blocking_cleanup_sync(self, func, timeout_seconds: float, label: str) -> bool:
+        """Sync-версия _run_blocking_cleanup() для use-case в listen()."""
+        done_event = threading.Event()
+        error_holder = {}
+
+        def runner():
+            try:
+                func()
+            except Exception as exc:
+                error_holder["error"] = exc
+            finally:
+                done_event.set()
+
+        thread = threading.Thread(target=runner, daemon=True, name=label)
+        thread.start()
+
+        done_event.wait(timeout=max(0.0, float(timeout_seconds)))
+        if not done_event.is_set():
+            print(f"⚠️ {label} timed out; continuing cleanup.")
+            return False
+
+        error = error_holder.get("error")
+        if error is not None:
+            print(f"⚠️ {label} failed: {error}")
+            return False
+        return True
+
     async def _process_audio(
         self, dg_connection, timeout_seconds, detection_timeout, fast_stop
     ):
@@ -334,6 +361,7 @@ class SpeechRecognizer:
 
         # --- Задача отправки аудио с буферизацией ---
         sender_stop_event = threading.Event()
+        stream_holder = {"stream": None}
 
         def request_stop():
             stop_event.set()
@@ -346,6 +374,7 @@ class SpeechRecognizer:
 
             try:
                 stream, stream_sample_rate = self._open_stream_for_session()
+                stream_holder["stream"] = stream
                 options = LiveOptions(
                     model="nova-2",  # Самая быстрая и точная модель
                     language=self.current_lang,
@@ -465,6 +494,7 @@ class SpeechRecognizer:
                 with contextlib.suppress(Exception):
                     if stream:
                         stream.close()
+                stream_holder["stream"] = None
                 print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}")
 
         sender_thread = threading.Thread(
@@ -544,23 +574,56 @@ class SpeechRecognizer:
             sender_thread,
             timeout_seconds=max(SENDER_STOP_WAIT_SECONDS, SENDER_FORCE_RELEASE_WAIT_SECONDS),
         )
+        cleanup_unhealthy = False
         if not sender_stopped:
             print("⚠️ Audio sender shutdown timed out; continuing cleanup.")
+            cleanup_unhealthy = True
+
+            def force_close_stream():
+                stream = stream_holder.get("stream")
+                if not stream:
+                    return
+                with contextlib.suppress(Exception):
+                    if stream.is_active():
+                        stream.stop_stream()
+                with contextlib.suppress(Exception):
+                    stream.close()
+                stream_holder["stream"] = None
+
+            await self._run_blocking_cleanup(
+                force_close_stream,
+                timeout_seconds=SENDER_FORCE_RELEASE_WAIT_SECONDS,
+                label="STT audio stream force close",
+            )
+
+            # Дадим шанс потоку выйти после принудительного закрытия.
+            sender_stopped = await self._wait_for_thread(sender_thread, timeout_seconds=0.6)
+            if not sender_stopped:
+                cleanup_unhealthy = True
 
         # Небольшая пауза, чтобы получить последние transcript-события перед finish().
         await asyncio.sleep(DEEPGRAM_FINALIZATION_GRACE_SECONDS)
 
         # Завершаем соединение и ждем последние результаты
-        await self._run_blocking_cleanup(
+        finish_ok = await self._run_blocking_cleanup(
             dg_connection.finish,
             timeout_seconds=DEEPGRAM_FINISH_TIMEOUT_SECONDS,
             label="Deepgram finish",
         )
+        if not finish_ok:
+            cleanup_unhealthy = True
 
         final_text = self.transcript.strip()
         if not final_text:
             final_text = latest_interim.strip()
         self.transcript = final_text
+        if cleanup_unhealthy:
+            # Если текст уже получен, не теряем команду пользователя.
+            # Но сбрасываем клиента, чтобы следующая STT-сессия стартовала на чистом соединении.
+            self.dg_client = None
+            if final_text:
+                return final_text
+            raise RuntimeError("Deepgram session cleanup timed out")
         return final_text
 
     def listen(
@@ -622,10 +685,20 @@ class SpeechRecognizer:
                 # Закрываем соединение, если оно было создано
                 if dg_connection:
                     try:
-                        dg_connection.finish()
+                        self._run_blocking_cleanup_sync(
+                            dg_connection.finish,
+                            timeout_seconds=DEEPGRAM_FINISH_TIMEOUT_SECONDS,
+                            label="Deepgram finish (error cleanup)",
+                        )
                     except:
                         pass  # Игнорируем ошибки при завершении
 
+                # Принудительно сбрасываем клиента, чтобы след. попытка не унаследовала
+                # подвисшее соединение SDK.
+                self.dg_client = None
+                with contextlib.suppress(Exception):
+                    self.initialize()
+
             if attempt < 2:  # Не ждем после последней попытки
                 print(f"⚠️ Не удалось подключиться к Deepgram, попытка {attempt + 1}/3, повторяю...")
                 time.sleep(1)  # Уменьшаем задержку между попытками
diff --git a/app/core/cleaner.py b/app/core/cleaner.py
index b276814..4a7034c 100644
--- a/app/core/cleaner.py
+++ b/app/core/cleaner.py
@@ -147,6 +147,73 @@ def numbers_to_words(text: str) -> str:
 
     preps_list = "|".join(map(re.escape, PREPOSITION_CASES.keys()))
 
+    # Время вида "в 7:00" / "во 7:00" / "к 7:05" / "07:00" -> человеческая русская форма.
+    # Важно: "в семь" (не "в семи"), "к семи" (дательный).
+    def _minute_words(minute_val: int) -> str:
+        if minute_val == 0:
+            return "ровно"
+        if minute_val < 10:
+            return "ноль " + convert_number(
+                str(minute_val), context_type="cardinal", case="nominative", gender="m"
+            )
+        return convert_number(str(minute_val), context_type="cardinal", case="nominative", gender="m")
+
+    def replace_time_match(match):
+        prep = match.group(1) or ""
+        hour_str = match.group(2)
+        minute_str = match.group(3)
+
+        try:
+            hour_val = int(hour_str)
+            minute_val = int(minute_str)
+        except Exception:
+            return match.group(0)
+
+        if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
+            return match.group(0)
+
+        prep_clean = prep.strip().lower()
+        if prep_clean in {"в", "во"}:
+            hour_case = "accusative"
+        elif prep_clean in {"к", "ко"}:
+            hour_case = "dative"
+        else:
+            hour_case = "nominative"
+
+        hour_words = convert_number(str(hour_val), context_type="cardinal", case=hour_case, gender="m")
+        minute_words = _minute_words(minute_val)
+
+        prefix = f"{prep} " if prep else ""
+        return f"{prefix}{hour_words} {minute_words}"
+
+    def replace_time_no_prep_match(match):
+        hour_str = match.group(1)
+        minute_str = match.group(2)
+
+        try:
+            hour_val = int(hour_str)
+            minute_val = int(minute_str)
+        except Exception:
+            return match.group(0)
+
+        if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
+            return match.group(0)
+
+        hour_words = convert_number(str(hour_val), context_type="cardinal", case="nominative", gender="m")
+        minute_words = _minute_words(minute_val)
+        return f"{hour_words} {minute_words}"
+
+    text = re.sub(
+        r"(?i)\b(в|во|к|ко)\s+(\d{1,2})\s*:\s*(\d{2})\b",
+        replace_time_match,
+        text,
+    )
+    text = re.sub(
+        r"\b(\d{1,2})\s*:\s*(\d{2})\b",
+        replace_time_no_prep_match,
+        text,
+    )
+
     # Года с суффиксом
     def replace_year_suffix_match(match):
         prep = match.group(1)
diff --git a/app/main.py b/app/main.py
index edde6c8..f642005 100644
--- a/app/main.py
+++ b/app/main.py
@@ -335,6 +335,7 @@ def main():
 
                 if not user_text:
                     # Молчание — возвращаемся к ожиданию
+                    print("user was not talking")
                     skip_wakeword = False
                     continue