From 6add70fcd2de87b221797e443086692e27e9658c Mon Sep 17 00:00:00 2001 From: future Date: Sun, 15 Mar 2026 16:22:00 +0300 Subject: [PATCH] Fix TTS time phrases and STT cleanup --- .gitignore | 3 ++ app/audio/stt.py | 77 +++++++++++++++++++++++++++++++++++++++++++-- app/core/cleaner.py | 67 +++++++++++++++++++++++++++++++++++++++ app/main.py | 1 + 4 files changed, 146 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 7749cc9..52d117e 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,9 @@ vosk-model-*/ # VS Code .vscode/ +# Runtime state +data/music_state.json + .beads .gitattributes diff --git a/app/audio/stt.py b/app/audio/stt.py index 8ee2e43..1c4483b 100644 --- a/app/audio/stt.py +++ b/app/audio/stt.py @@ -234,6 +234,33 @@ class SpeechRecognizer: return True + def _run_blocking_cleanup_sync(self, func, timeout_seconds: float, label: str) -> bool: + """Sync-версия _run_blocking_cleanup() для use-case в listen().""" + done_event = threading.Event() + error_holder = {} + + def runner(): + try: + func() + except Exception as exc: + error_holder["error"] = exc + finally: + done_event.set() + + thread = threading.Thread(target=runner, daemon=True, name=label) + thread.start() + + done_event.wait(timeout=max(0.0, float(timeout_seconds))) + if not done_event.is_set(): + print(f"⚠️ {label} timed out; continuing cleanup.") + return False + + error = error_holder.get("error") + if error is not None: + print(f"⚠️ {label} failed: {error}") + return False + return True + async def _process_audio( self, dg_connection, timeout_seconds, detection_timeout, fast_stop ): @@ -334,6 +361,7 @@ class SpeechRecognizer: # --- Задача отправки аудио с буферизацией --- sender_stop_event = threading.Event() + stream_holder = {"stream": None} def request_stop(): stop_event.set() @@ -346,6 +374,7 @@ class SpeechRecognizer: try: stream, stream_sample_rate = self._open_stream_for_session() + stream_holder["stream"] = stream options = LiveOptions( model="nova-2", # Самая быстрая и точная модель language=self.current_lang, @@ -465,6 +494,7 @@ class SpeechRecognizer: with contextlib.suppress(Exception): if stream: stream.close() + stream_holder["stream"] = None print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}") sender_thread = threading.Thread( @@ -544,23 +574,56 @@ class SpeechRecognizer: sender_thread, timeout_seconds=max(SENDER_STOP_WAIT_SECONDS, SENDER_FORCE_RELEASE_WAIT_SECONDS), ) + cleanup_unhealthy = False if not sender_stopped: print("⚠️ Audio sender shutdown timed out; continuing cleanup.") + cleanup_unhealthy = True + + def force_close_stream(): + stream = stream_holder.get("stream") + if not stream: + return + with contextlib.suppress(Exception): + if stream.is_active(): + stream.stop_stream() + with contextlib.suppress(Exception): + stream.close() + stream_holder["stream"] = None + + await self._run_blocking_cleanup( + force_close_stream, + timeout_seconds=SENDER_FORCE_RELEASE_WAIT_SECONDS, + label="STT audio stream force close", + ) + + # Дадим шанс потоку выйти после принудительного закрытия. + sender_stopped = await self._wait_for_thread(sender_thread, timeout_seconds=0.6) + if not sender_stopped: + cleanup_unhealthy = True # Небольшая пауза, чтобы получить последние transcript-события перед finish(). await asyncio.sleep(DEEPGRAM_FINALIZATION_GRACE_SECONDS) # Завершаем соединение и ждем последние результаты - await self._run_blocking_cleanup( + finish_ok = await self._run_blocking_cleanup( dg_connection.finish, timeout_seconds=DEEPGRAM_FINISH_TIMEOUT_SECONDS, label="Deepgram finish", ) + if not finish_ok: + cleanup_unhealthy = True final_text = self.transcript.strip() if not final_text: final_text = latest_interim.strip() self.transcript = final_text + if cleanup_unhealthy: + # Если текст уже получен, не теряем команду пользователя. + # Но сбрасываем клиента, чтобы следующая STT-сессия стартовала на чистом соединении. + self.dg_client = None + if final_text: + return final_text + raise RuntimeError("Deepgram session cleanup timed out") return final_text def listen( @@ -622,10 +685,20 @@ class SpeechRecognizer: # Закрываем соединение, если оно было создано if dg_connection: try: - dg_connection.finish() + self._run_blocking_cleanup_sync( + dg_connection.finish, + timeout_seconds=DEEPGRAM_FINISH_TIMEOUT_SECONDS, + label="Deepgram finish (error cleanup)", + ) except: pass # Игнорируем ошибки при завершении + # Принудительно сбрасываем клиента, чтобы след. попытка не унаследовала + # подвисшее соединение SDK. + self.dg_client = None + with contextlib.suppress(Exception): + self.initialize() + if attempt < 2: # Не ждем после последней попытки print(f"⚠️ Не удалось подключиться к Deepgram, попытка {attempt + 1}/3, повторяю...") time.sleep(1) # Уменьшаем задержку между попытками diff --git a/app/core/cleaner.py b/app/core/cleaner.py index b276814..4a7034c 100644 --- a/app/core/cleaner.py +++ b/app/core/cleaner.py @@ -147,6 +147,73 @@ def numbers_to_words(text: str) -> str: preps_list = "|".join(map(re.escape, PREPOSITION_CASES.keys())) + # Время вида "в 7:00" / "во 7:00" / "к 7:05" / "07:00" -> человеческая русская форма. + # Важно: "в семь" (не "в семи"), "к семи" (дательный). + def _minute_words(minute_val: int) -> str: + if minute_val == 0: + return "ровно" + if minute_val < 10: + return "ноль " + convert_number( + str(minute_val), context_type="cardinal", case="nominative", gender="m" + ) + return convert_number(str(minute_val), context_type="cardinal", case="nominative", gender="m") + + def replace_time_match(match): + prep = match.group(1) or "" + hour_str = match.group(2) + minute_str = match.group(3) + + try: + hour_val = int(hour_str) + minute_val = int(minute_str) + except Exception: + return match.group(0) + + if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59): + return match.group(0) + + prep_clean = prep.strip().lower() + if prep_clean in {"в", "во"}: + hour_case = "accusative" + elif prep_clean in {"к", "ко"}: + hour_case = "dative" + else: + hour_case = "nominative" + + hour_words = convert_number(str(hour_val), context_type="cardinal", case=hour_case, gender="m") + minute_words = _minute_words(minute_val) + + prefix = f"{prep} " if prep else "" + return f"{prefix}{hour_words} {minute_words}" + + def replace_time_no_prep_match(match): + hour_str = match.group(1) + minute_str = match.group(2) + + try: + hour_val = int(hour_str) + minute_val = int(minute_str) + except Exception: + return match.group(0) + + if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59): + return match.group(0) + + hour_words = convert_number(str(hour_val), context_type="cardinal", case="nominative", gender="m") + minute_words = _minute_words(minute_val) + return f"{hour_words} {minute_words}" + + text = re.sub( + r"(?i)\b(в|во|к|ко)\s+(\d{1,2})\s*:\s*(\d{2})\b", + replace_time_match, + text, + ) + text = re.sub( + r"\b(\d{1,2})\s*:\s*(\d{2})\b", + replace_time_no_prep_match, + text, + ) + # Года с суффиксом def replace_year_suffix_match(match): prep = match.group(1) diff --git a/app/main.py b/app/main.py index edde6c8..f642005 100644 --- a/app/main.py +++ b/app/main.py @@ -335,6 +335,7 @@ def main(): if not user_text: # Молчание — возвращаемся к ожиданию + print("user was not talking") skip_wakeword = False continue