fix: improve streaming, alarms, and AI TTS

2026-03-12 12:38:35 +03:00
parent 167ddc9264
commit 6769486e83
3 changed files with 171 additions and 81 deletions
--- a/app/core/ai.py
+++ b/app/core/ai.py
@@ -283,11 +283,7 @@ def _extract_response_content(cfg, data: dict) -> str:


 def _iter_openai_compatible_stream(response):
-    for line in response.iter_lines(decode_unicode=True):
-        if not line or not line.startswith("data:"):
-            continue
-
-        data_str = line[5:].strip()
+    for data_str in _iter_sse_data_lines(response):
        if data_str == "[DONE]":
            break

@@ -317,11 +313,7 @@ def _iter_openai_compatible_stream(response):


 def _iter_anthropic_stream(response):
-    for line in response.iter_lines(decode_unicode=True):
-        if not line or not line.startswith("data:"):
-            continue
-
-        data_str = line[5:].strip()
+    for data_str in _iter_sse_data_lines(response):
        if data_str == "[DONE]":
            break

@@ -344,6 +336,28 @@ def _iter_anthropic_stream(response):
                yield str(text)


+def _iter_sse_data_lines(response):
+    """
+    Читает SSE-стрим и возвращает только payload после "data:".
+    Явно декодируем как UTF-8, чтобы избежать mojibake вида "ÐÑ...".
+    """
+    for raw_line in response.iter_lines(decode_unicode=False):
+        if not raw_line:
+            continue
+
+        if isinstance(raw_line, bytes):
+            line = raw_line.decode("utf-8", errors="replace")
+        else:
+            line = str(raw_line)
+
+        if not line.startswith("data:"):
+            continue
+
+        data_str = line[5:].strip()
+        if data_str:
+            yield data_str
+
+
 def _iter_stream_chunks(cfg, response):
    if cfg["protocol"] == "anthropic":
        yield from _iter_anthropic_stream(response)
--- a/app/features/alarm.py
+++ b/app/features/alarm.py
@@ -18,6 +18,121 @@ ALARM_FILE = BASE_DIR / "data" / "alarms.json"
 ALARM_SOUND = BASE_DIR / "assets" / "sounds" / "Apex-1.mp3"
 ASK_ALARM_TIME_PROMPT = "На какое время мне поставить будильник?"

+_NUMBER_UNITS = {
+    "ноль": 0,
+    "один": 1,
+    "одна": 1,
+    "два": 2,
+    "две": 2,
+    "три": 3,
+    "четыре": 4,
+    "пять": 5,
+    "шесть": 6,
+    "семь": 7,
+    "восемь": 8,
+    "девять": 9,
+}
+_NUMBER_TEENS = {
+    "десять": 10,
+    "одиннадцать": 11,
+    "двенадцать": 12,
+    "тринадцать": 13,
+    "четырнадцать": 14,
+    "пятнадцать": 15,
+    "шестнадцать": 16,
+    "семнадцать": 17,
+    "восемнадцать": 18,
+    "девятнадцать": 19,
+}
+_NUMBER_TENS = {
+    "двадцать": 20,
+    "тридцать": 30,
+    "сорок": 40,
+    "пятьдесят": 50,
+}
+_PARTS_OF_DAY = {"утра", "дня", "вечера", "ночи"}
+_FILLER_WORDS = {"мне", "меня", "пожалуйста", "на", "в", "во", "к", "и"}
+_HOUR_WORDS = {"час", "часа", "часов"}
+_MINUTE_WORDS = {"минута", "минуту", "минуты", "минут"}
+
+
+def _parse_number_tokens(tokens, start_index: int):
+    if start_index >= len(tokens):
+        return None, 0
+
+    token = tokens[start_index]
+    if token.isdigit():
+        return int(token), 1
+
+    if token in _NUMBER_TEENS:
+        return _NUMBER_TEENS[token], 1
+
+    if token in _NUMBER_TENS:
+        value = _NUMBER_TENS[token]
+        if start_index + 1 < len(tokens):
+            next_token = tokens[start_index + 1]
+            if next_token in _NUMBER_UNITS:
+                value += _NUMBER_UNITS[next_token]
+                return value, 2
+        return value, 1
+
+    if token in _NUMBER_UNITS:
+        return _NUMBER_UNITS[token], 1
+
+    return None, 0
+
+
+def _apply_part_of_day(hour: int, part_of_day: str | None) -> int:
+    if not part_of_day:
+        return hour
+
+    if part_of_day == "утра":
+        return 0 if hour == 12 else hour
+    if part_of_day == "ночи":
+        return 0 if hour == 12 else hour
+    if part_of_day in {"дня", "вечера"} and hour < 12:
+        return hour + 12
+    return hour
+
+
+def _extract_alarm_time_words(text: str):
+    tokens = re.findall(r"[a-zа-я0-9]+", text.lower().replace("ё", "е"))
+    markers = {"будильник", "разбуди", "поставь", "установи", "включи", "на", "в", "к"}
+
+    for index, token in enumerate(tokens):
+        if token not in markers:
+            continue
+
+        current = index + 1
+        while current < len(tokens) and tokens[current] in _FILLER_WORDS:
+            current += 1
+
+        hour, consumed = _parse_number_tokens(tokens, current)
+        if hour is None:
+            continue
+        current += consumed
+
+        if current < len(tokens) and tokens[current] in _HOUR_WORDS:
+            current += 1
+
+        minute = 0
+        if current < len(tokens) and tokens[current] not in _PARTS_OF_DAY:
+            parsed_minute, minute_consumed = _parse_number_tokens(tokens, current)
+            if parsed_minute is not None:
+                minute = parsed_minute
+                current += minute_consumed
+                if current < len(tokens) and tokens[current] in _MINUTE_WORDS:
+                    current += 1
+
+        part_of_day = None
+        if current < len(tokens) and tokens[current] in _PARTS_OF_DAY:
+            part_of_day = tokens[current]
+
+        if 0 <= hour <= 23 and 0 <= minute <= 59:
+            return _apply_part_of_day(hour, part_of_day), minute
+
+    return None
+

 class AlarmClock:
    def __init__(self):
@@ -70,10 +185,10 @@ class AlarmClock:
        if re.search(r"\b(каждый день|ежедневно)\b", text):
            return [0, 1, 2, 3, 4, 5, 6]

-        if re.search(r"\b(по будн|в будн|будние)\b", text):
+        if re.search(r"\b(?:по\s+будн\w*|в\s+будн\w*|будн\w*)\b", text):
            days.update([0, 1, 2, 3, 4])

-        if re.search(r"\b(по выходн|в выходн|выходные)\b", text):
+        if re.search(r"\b(?:по\s+выходн\w*|в\s+выходн\w*|выходн\w*)\b", text):
            days.update([5, 6])

        day_patterns = {
@@ -268,32 +383,32 @@ class AlarmClock:

        days = self._extract_alarm_days(text)

-        # Поиск формата "7:30", "7.30"
-        match = re.search(r"\b(\d{1,2})[:.-](\d{2})\b", text)
+        # Поиск формата "7:30", "7.30" и вариантов с "в/на/к".
+        match = re.search(r"(?:\b(?:на|в|во|к)\s+)?(\d{1,2})[:.-](\d{2})\b", text)
        if match:
            h, m = int(match.group(1)), int(match.group(2))
+            period_match = re.search(
+                r"\b(?:на|в|во|к)?\s*" + re.escape(match.group(0).strip()) + r"\s+(утра|дня|вечера|ночи)\b",
+                text,
+            )
+            part_of_day = period_match.group(1) if period_match else None
+            h = _apply_part_of_day(h, part_of_day)
            if 0 <= h <= 23 and 0 <= m <= 59:
                self.add_alarm_with_days(h, m, days=days)
                days_phrase = self._format_days_phrase(days)
                suffix = f" {days_phrase}" if days_phrase else ""
                return f"Я установил будильник на {h} часов {m} минут{suffix}."

-        # Поиск формата словами "на 7 часов 15 минут"
+        # Поиск формата цифрами: "в 7 утра", "на 7", "к 6 30"
        match_time = re.search(
-            r"на\s+(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?",
+            r"(?:\b(?:на|в|во|к)\s+)?(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?(?:\s+(утра|дня|вечера|ночи))?\b",
            text,
        )

        if match_time:
            h = int(match_time.group(1))
            m = int(match_time.group(2)) if match_time.group(2) else 0
-
-            # Умная коррекция времени (если говорят "в 8", а сейчас 9, то это скорее 8 вечера или 8 утра завтра)
-            # Здесь простая логика AM/PM
-            if "вечера" in text and h < 12:
-                h += 12
-            elif "утра" in text and h == 12:
-                h = 0
+            h = _apply_part_of_day(h, match_time.group(3))

            if 0 <= h <= 23 and 0 <= m <= 59:
                self.add_alarm_with_days(h, m, days=days)
@@ -301,6 +416,15 @@ class AlarmClock:
                suffix = f" {days_phrase}" if days_phrase else ""
                return f"Хорошо, разбужу вас в {h}:{m:02d}{suffix}."

+        # Поиск формата словами: "в семь утра", "будильник семь тридцать"
+        word_time = _extract_alarm_time_words(text)
+        if word_time:
+            h, m = word_time
+            self.add_alarm_with_days(h, m, days=days)
+            days_phrase = self._format_days_phrase(days)
+            suffix = f" {days_phrase}" if days_phrase else ""
+            return f"Хорошо, разбужу вас в {h}:{m:02d}{suffix}."
+
        if re.search(r"(постав|установ|запусти|включи|разбуди)", text) or text.strip() in {
            "будильник",
            "поставь будильник",
--- a/app/main.py
+++ b/app/main.py
@@ -2,14 +2,9 @@
 Smart Speaker - Main Application
 """

-import os
-
-import os
-import queue
 import re
 import signal
 import sys
-import threading
 import time
 from collections import deque

@@ -566,38 +561,8 @@ def main():
            # AI запрос
            chat_history.append({"role": "user", "content": user_text})

-            # Очередь для TTS
-            tts_q = queue.Queue()
-            interrupt_event = threading.Event()
-
-            def tts_worker():
-                """Фоновый поток для озвучки."""
-                while True:
-                    item = tts_q.get()
-                    if item is None:
-                        tts_q.task_done()
-                        break
-
-                    text, lang = item
-
-                    if interrupt_event.is_set():
-                        tts_q.task_done()
-                        continue
-
-                    completed = speak(
-                        text, check_interrupt=check_wakeword_once, language=lang
-                    )
-
-                    if not completed:
-                        interrupt_event.set()
-
-                    tts_q.task_done()
-
-            worker_thread = threading.Thread(target=tts_worker, daemon=True)
-            worker_thread.start()
-
            full_response = ""
-            buffer = ""
+            interrupted = False

            try:
                # Streaming от AI
@@ -606,33 +571,20 @@ def main():
                print("🤖 AI: ", end="", flush=True)

                for chunk in stream_generator:
-                    if interrupt_event.is_set():
-                        break
-
-                    buffer += chunk
                    full_response += chunk
                    print(chunk, end="", flush=True)

-                    # Конец предложения
-                    if re.search(r"[.!?\n]+(?:\s|$)", buffer):
-                        clean_chunk = clean_response(buffer, language="ru")
-                        if clean_chunk.strip():
-                            tts_q.put((clean_chunk, "ru"))
-                        buffer = ""
-
-                # Остаток
-                if buffer.strip() and not interrupt_event.is_set():
-                    clean_chunk = clean_response(buffer, language="ru")
-                    if clean_chunk.strip():
-                        tts_q.put((clean_chunk, "ru"))
-
            except Exception as e:
                print(f"\n❌ Ошибка: {e}")
                speak("Произошла ошибка при получении ответа.")
-
-            # Ждем окончания озвучки
-            tts_q.put(None)
-            worker_thread.join()
+            else:
+                clean_ai_response = clean_response(full_response, language="ru")
+                if clean_ai_response.strip():
+                    interrupted = not speak(
+                        clean_ai_response,
+                        check_interrupt=check_wakeword_once,
+                        language="ru",
+                    )

            print()

@@ -643,7 +595,7 @@ def main():
            stop_wakeword_monitoring()
            skip_wakeword = True

-            if interrupt_event.is_set():
+            if interrupted:
                print("⏹️ Ответ прерван")

            print()