smart-speaker/app/main.py

"""
Smart Speaker - Main Application
"""

import re
import signal
import sys
import time
from collections import deque

# Для воспроизведения звуков (mp3)
try:
    from pygame import mixer
except Exception as exc:
    mixer = None
    _MIXER_IMPORT_ERROR = exc
else:
    _MIXER_IMPORT_ERROR = None

# Наши модули
from .audio.sound_level import parse_volume_text, set_volume
from .audio.stt import cleanup as cleanup_stt
from .audio.stt import get_recognizer, listen
from .audio.tts import initialize as init_tts
from .audio.tts import speak
from .audio.wakeword import (
    check_wakeword_once,
    wait_for_wakeword,
)
from .audio.wakeword import (
    cleanup as cleanup_wakeword,
)
from .audio.wakeword import (
    stop_monitoring as stop_wakeword_monitoring,
)
from .core.ai import ask_ai_stream, interpret_assistant_intent, translate_text
from .core.config import BASE_DIR, WAKE_WORD
from .core.cleaner import clean_response
from .core.commands import is_stop_command
from .core.smalltalk import get_smalltalk_response
from .features.alarm import ASK_ALARM_TIME_PROMPT, get_alarm_clock
from .features.stopwatch import get_stopwatch_manager
from .features.timer import ASK_TIMER_TIME_PROMPT, get_timer_manager
from .features.weather import get_weather_report
from .features.music import get_music_controller
from .features.cities_game import get_cities_game

_TRANSLATION_COMMANDS = [
    ("переведи на английский с русского", "ru", "en"),
    ("переведи на русский с английского", "en", "ru"),
    ("переведи на английский язык с русского", "ru", "en"),
    ("переведи на русский язык с английского", "en", "ru"),
    ("переведи с русского на английский", "ru", "en"),
    ("переведи с русского в английский", "ru", "en"),
    ("переведи с английского на русский", "en", "ru"),
    ("переведи с английского в русский", "en", "ru"),
    ("переведи с русского языка", "ru", "en"),
    ("переведи с английского языка", "en", "ru"),
    ("переведи на английский язык", "ru", "en"),
    ("переведи на русский язык", "en", "ru"),
    ("переведи на английский", "ru", "en"),
    ("переведи на русский", "en", "ru"),
    ("переведи с английского", "en", "ru"),
    ("переведи с русского", "ru", "en"),
    ("как по-английски", "ru", "en"),
    ("как по английски", "ru", "en"),
    ("как по-русски", "en", "ru"),
    ("как по русски", "en", "ru"),
    ("translate to english from russian", "ru", "en"),
    ("translate to russian from english", "en", "ru"),
    ("translate from russian to english", "ru", "en"),
    ("translate from english to russian", "en", "ru"),
    ("translate into english", "ru", "en"),
    ("translate into russian", "en", "ru"),
    ("translate to english", "ru", "en"),
    ("translate to russian", "en", "ru"),
    ("translate from english", "en", "ru"),
    ("translate from russian", "ru", "en"),
]
_TRANSLATION_COMMANDS_SORTED = sorted(
    _TRANSLATION_COMMANDS, key=lambda item: len(item[0]), reverse=True
)

_REPEAT_PHRASES = {
    "еще раз",
    "повтори",
    "скажи еще раз",
    "что ты сказал",
    "повтори пожалуйста",
    "waltron еще раз",
    "еще раз waltron",
    "waltron повтори",
    "повтори waltron",
    "волтрон еще раз",
    "еще раз волтрон",
    "волтрон повтори",
    "повтори волтрон",
}

_WEATHER_TRIGGERS = (
    "погода",
    "погоду",
    "что на улице",
    "какая температура",
    "сколько градусов",
    "холодно ли",
    "жарко ли",
    "нужен ли зонт",
    "брать ли зонт",
    "прогноз погоды",
    "че там на улице",
    "что там на улице",
    "как на улице",
    "как на улице-то",
)

_CITY_INVALID_WORDS = {
    "этом",
    "том",
    "той",
    "тут",
    "здесь",
    "там",
    "всё",
    "все",
    "всей",
    "всего",
    "всем",
    "всеми",
    "городе",
    "город",
    "село",
    "деревня",
    "посёлок",
    "аул",
    "станция",
    "область",
    "район",
    "край",
    "республика",
}

_CITY_PATTERNS = [
    re.compile(
        r"в\s+городе\s+([а-яёa-z]+[-\s]*[а-яёa-z]*(?:[-\s]+[а-яёa-z]+)*)",
        re.IGNORECASE,
    ),
    re.compile(
        r"в\s+([а-яёa-z]+[-\s]*[а-яёa-z]*(?:[-\s]+[а-яёa-z]+)*)",
        re.IGNORECASE,
    ),
    re.compile(
        r"погода\s+в\s+([а-яёa-z]+[-\s]*[а-яёa-z]*(?:[-\s]+[а-яёa-z]+)*)",
        re.IGNORECASE,
    ),
    re.compile(
        r"погода\s+([а-яёa-z]+[-\s]*[а-яёa-z]*(?:[-\s]+[а-яёa-z]+)*)\s+(?:какая|сейчас|там)",
        re.IGNORECASE,
    ),
    re.compile(
        r"(?:какая|как)\s+погода\s+в\s+([а-яёa-z]+[-\s]*[а-яёa-z]*(?:[-\s]+[а-яёa-z]+)*)",
        re.IGNORECASE,
    ),
]

_SEMANTIC_INTENT_MIN_CONFIDENCE = 0.55
_SEMANTIC_MUSIC_MIN_CONFIDENCE = 0.45
_SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE = 0.72


def signal_handler(sig, frame):
    """Обработчик Ctrl+C."""
    print("\n\n👋 Завершение работы...")
    print("\n\n👋 Завершение работы...")
    try:
        cleanup_wakeword()  # Остановка Porcupine
    except Exception as e:
        print(f"Ошибка при остановке wakeword: {e}")
    try:
        cleanup_stt()  # Остановка Deepgram
    except Exception as e:
        print(f"Ошибка при остановке STT: {e}")
    sys.exit(0)


def parse_translation_request(text: str):
    """Проверяет, является ли фраза запросом на перевод."""
    text_lower = text.lower().strip()
    text_lower = text.lower().strip()
    # Список префиксов команд перевода и соответствующих направлений языков.
    # Важно: более длинные префиксы должны проверяться первыми (например,
    # "переведи с русского на английский" не должен схватиться как "переведи с русского").
    for prefix, source_lang, target_lang in _TRANSLATION_COMMANDS_SORTED:
        if text_lower.startswith(prefix):
            # Отрезаем команду (префикс), оставляем только текст для перевода
            rest = text[len(prefix) :].strip()
            rest = rest.lstrip(" :—-")
            return {
                "source_lang": source_lang,
                "target_lang": target_lang,
                "text": rest,
            }
    return None


def main():
    """Точка входа."""
    print("=" * 50)
    print("=" * 50)
    print("🔊 УМНАЯ КОЛОНКА")
    print("=" * 50)
    print(f"Скажите '{WAKE_WORD}' для активации")
    print("Нажмите Ctrl+C для выхода")
    print("=" * 50)
    print()

    # Устанавливаем перехватчик Ctrl+C
    signal.signal(signal.SIGINT, signal_handler)

    print("⏳ Инициализация моделей...")

    # Инициализация звуковой системы для эффектов (опционально)
    ding_sound = None
    if mixer is None:
        print(
            "Warning: pygame mixer not available; sound effects disabled."
            f" ({_MIXER_IMPORT_ERROR})"
        )
    else:
        try:
            mixer.init()
        except Exception as exc:
            print(f"Warning: pygame mixer init failed; sound effects disabled. ({exc})")
        else:
            ding_sound_path = BASE_DIR / "assets" / "sounds" / "ding.wav"
            if ding_sound_path.exists():
                ding_sound = mixer.Sound(str(ding_sound_path))
                ding_sound.set_volume(0.3)
            else:
                print(f"⚠️ Звук {ding_sound_path} не найден")

    get_recognizer().initialize()  # Подключение к Deepgram
    init_tts()  # Загрузка нейросети для синтеза речи (Silero)
    alarm_clock = get_alarm_clock()  # Загрузка будильников
    stopwatch_manager = get_stopwatch_manager()  # Загрузка секундомеров
    timer_manager = get_timer_manager()  # Загрузка таймеров
    cities_game = get_cities_game()  # Игра "Города"
    print()

    # История чата
    chat_history = deque(maxlen=20)

    # Последний ответ ассистента
    last_response = None

    # Режим диалога (без wake word)
    skip_wakeword = False

    followup_idle_timeout_seconds = 3.7

    # Контекст уточнения времени для таймера/будильника
    pending_time_target = None

    # Проверка здоровья STT
    last_stt_check = time.time()

    # ГЛАВНЫЙ ЦИКЛ
    while True:
        # Периодическая проверка STT
        if time.time() - last_stt_check > 600:
            try:
                recognizer = get_recognizer()
                if hasattr(recognizer, "check_connection_health"):
                    recognizer.check_connection_health()
                last_stt_check = time.time()
            except Exception as e:
                print(f"Ошибка при проверке STT: {e}")
        try:
            # Освобождаем микрофон wake word
            stop_wakeword_monitoring()

            # Проверяем таймеры
            if timer_manager.check_timers():
                skip_wakeword = False
                continue

            # Проверяем будильники
            if alarm_clock.check_alarms():
                skip_wakeword = False
                continue

            # Ждем wake word
            if not skip_wakeword:
                detected = wait_for_wakeword(timeout=0.5)

                # Если время вышло — проверяем будильники
                if not detected:
                    continue

                # Звук активации
                if ding_sound:
                    ding_sound.play()

                # Слушаем команду
                try:
                    user_text = listen(timeout_seconds=5.0, fast_stop=True)
                except Exception as e:
                    print(f"Ошибка при прослушивании: {e}")
                    print("Переинициализация STT...")
                    try:
                        cleanup_stt()
                        get_recognizer().initialize()
                    except Exception as init_error:
                        print(f"Ошибка переинициализации STT: {init_error}")
                    continue  # Продолжаем цикл
            else:
                # Follow-up режим — без wake word
                print(f"👂 Слушаю ({followup_idle_timeout_seconds:.1f} сек)...")
                try:
                    user_text = listen(
                        timeout_seconds=7.0,
                        detection_timeout=followup_idle_timeout_seconds,
                        fast_stop=True,
                    )
                except Exception as e:
                    print(f"Ошибка при прослушивании: {e}")
                    print("Переинициализация STT...")
                    try:
                        cleanup_stt()
                        get_recognizer().initialize()
                    except Exception as init_error:
                        print(f"Ошибка переинициализации STT: {init_error}")
                    skip_wakeword = False
                    continue

                if not user_text:
                    # Молчание — возвращаемся к ожиданию
                    skip_wakeword = False
                    continue

            # Анализ текста
            if not user_text:
                skip_wakeword = False
                continue

            # Проверка на команду "Стоп"
            if is_stop_command(user_text):
                music_controller = get_music_controller()
                music_stop_response = music_controller.pause_for_stop_word()
                if music_stop_response:
                    print(f"🎵 {music_stop_response}")

                if stopwatch_manager.has_running_stopwatches():
                    stopwatch_stop_response = stopwatch_manager.pause_stopwatches()
                    clean_stopwatch_stop_response = clean_response(
                        stopwatch_stop_response, language="ru"
                    )
                    speak(clean_stopwatch_stop_response)
                    last_response = clean_stopwatch_stop_response
                    skip_wakeword = False
                    continue
                print("_" * 50)
                print(f"💤 Жду '{WAKE_WORD}'...")
                skip_wakeword = False
                continue

            # Проверка на "Повтори"
            user_text_lower = user_text.lower().strip()
            if user_text_lower in _REPEAT_PHRASES or (
                user_text_lower.startswith("повтори")
                and "за мной" not in user_text_lower
            ):
                if last_response:
                    print(f"🔁 Повторяю: {last_response}")
                    speak(last_response)
                else:
                    speak("Я еще ничего не говорил.")
                skip_wakeword = True
                continue

            effective_text = user_text
            semantic_intent = interpret_assistant_intent(user_text)
            semantic_type = str(semantic_intent.get("intent", "none")).strip().lower()
            try:
                semantic_confidence = float(
                    semantic_intent.get("confidence", 0.0) or 0.0
                )
            except (TypeError, ValueError):
                semantic_confidence = 0.0
            semantic_command = str(semantic_intent.get("normalized_command", "")).strip()
            semantic_music_action = (
                str(semantic_intent.get("music_action", "none")).strip().lower()
            )
            semantic_music_query = str(semantic_intent.get("music_query", "")).strip()

            if (
                semantic_type == "stop"
                and semantic_confidence >= _SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE
            ):
                music_controller = get_music_controller()
                music_stop_response = music_controller.pause_for_stop_word()
                if music_stop_response:
                    print(f"🎵 {music_stop_response}")

                if stopwatch_manager.has_running_stopwatches():
                    stopwatch_stop_response = stopwatch_manager.pause_stopwatches()
                    clean_stopwatch_stop_response = clean_response(
                        stopwatch_stop_response, language="ru"
                    )
                    speak(clean_stopwatch_stop_response)
                    last_response = clean_stopwatch_stop_response
                    skip_wakeword = False
                    continue
                print("_" * 50)
                print(f"💤 Жду '{WAKE_WORD}'...")
                skip_wakeword = False
                continue

            if (
                semantic_type == "repeat"
                and semantic_confidence >= _SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE
            ):
                if last_response:
                    print(f"🔁 Повторяю: {last_response}")
                    speak(last_response)
                else:
                    speak("Я еще ничего не говорил.")
                skip_wakeword = True
                continue

            if (
                semantic_type == "music"
                and semantic_confidence >= _SEMANTIC_MUSIC_MIN_CONFIDENCE
            ):
                music_controller = get_music_controller()
                semantic_music_response = music_controller.handle_semantic_action(
                    semantic_music_action,
                    semantic_music_query,
                )
                if semantic_music_response:
                    clean_music_response = clean_response(
                        semantic_music_response, language="ru"
                    )
                    speak(clean_music_response)
                    last_response = clean_music_response
                    skip_wakeword = True
                    continue

            if (
                semantic_command
                and semantic_confidence >= _SEMANTIC_INTENT_MIN_CONFIDENCE
                and semantic_type
                in {
                    "music",
                    "timer",
                    "alarm",
                    "weather",
                    "volume",
                    "translation",
                    "cities",
                }
            ):
                effective_text = semantic_command
                print(f"🧠 Команда: '{user_text}' -> '{effective_text}'")

            # Small-talk
            smalltalk_response = get_smalltalk_response(effective_text)
            if smalltalk_response:
                clean_smalltalk = clean_response(smalltalk_response, language="ru")
                speak(clean_smalltalk)
                last_response = clean_smalltalk
                skip_wakeword = True
                continue

            command_text = effective_text
            command_text_lower = command_text.lower()
            if pending_time_target == "timer" and "таймер" not in command_text_lower:
                command_text = f"таймер {command_text}"
            elif (
                pending_time_target == "alarm"
                and "будильник" not in command_text_lower
                and "разбуди" not in command_text_lower
            ):
                command_text = f"будильник {command_text}"

            # Таймеры
            stopwatch_response = stopwatch_manager.parse_command(command_text)
            if stopwatch_response:
                clean_stopwatch_response = clean_response(
                    stopwatch_response, language="ru"
                )
                speak(clean_stopwatch_response)
                last_response = clean_stopwatch_response
                skip_wakeword = True
                continue

            # Таймер
            timer_response = timer_manager.parse_command(command_text)
            if timer_response:
                clean_timer_response = clean_response(timer_response, language="ru")
                completed = speak(
                    clean_timer_response, check_interrupt=check_wakeword_once
                )
                last_response = clean_timer_response
                pending_time_target = (
                    "timer" if timer_response == ASK_TIMER_TIME_PROMPT else None
                )
                skip_wakeword = not completed
                continue

            # Будильник
            alarm_response = alarm_clock.parse_command(command_text)
            if alarm_response:
                clean_alarm_response = clean_response(alarm_response, language="ru")
                speak(clean_alarm_response)
                last_response = clean_alarm_response
                pending_time_target = (
                    "alarm" if alarm_response == ASK_ALARM_TIME_PROMPT else None
                )
                skip_wakeword = alarm_response == ASK_ALARM_TIME_PROMPT
                continue

            # Громкость
            if command_text.lower().startswith("громкость"):
                try:
                    vol_str = command_text.lower().replace("громкость", "", 1).strip()
                    level = parse_volume_text(vol_str)

                    if level is not None:
                        if set_volume(level):
                            msg = f"Громкость установлена на {level}"
                            clean_msg = clean_response(msg, language="ru")
                            speak(clean_msg)
                            last_response = clean_msg
                        else:
                            speak("Не удалось установить громкость.")
                    else:
                        speak(
                            "Я не понял число громкости. Скажите число от одного до десяти."
                        )

                    skip_wakeword = True
                    continue
                except Exception as e:
                    print(f"❌ Ошибка громкости: {e}")
                    speak("Не удалось изменить громкость.")
                    skip_wakeword = True
                    continue

            # Погода
            requested_city = None
            user_text_lower = command_text.lower()

            for pattern in _CITY_PATTERNS:
                match = pattern.search(user_text_lower)
                if match:
                    potential_city = match.group(1).strip()
                    if (
                        potential_city
                        and len(potential_city) > 1
                        and not any(
                            word in potential_city for word in _CITY_INVALID_WORDS
                        )
                    ):
                        requested_city = potential_city.title()
                        break

            has_weather_trigger = any(
                trigger in user_text_lower for trigger in _WEATHER_TRIGGERS
            )

            if has_weather_trigger:
                from .features.weather import get_weather_report

                weather_report = get_weather_report(requested_city)
                clean_report = clean_response(weather_report, language="ru")
                speak(clean_report)
                last_response = clean_report
                skip_wakeword = True
                continue

            # Музыка
            music_controller = get_music_controller()
            music_response = music_controller.parse_command(command_text)
            if music_response:
                clean_music_response = clean_response(music_response, language="ru")
                speak(clean_music_response)
                last_response = clean_music_response
                skip_wakeword = True
                continue

            # Перевод
            translation_request = parse_translation_request(command_text)
            if translation_request:
                source_lang = translation_request["source_lang"]
                target_lang = translation_request["target_lang"]
                text_to_translate = translation_request["text"]

                # Если сказано только "переведи" — спрашиваем
                if not text_to_translate:
                    prompt = (
                        "Скажи фразу на английском."
                        if source_lang == "en"
                        else "Скажи фразу на русском."
                    )
                    speak(prompt)
                    try:
                        text_to_translate = listen(
                            timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
                        )
                    except Exception as e:
                        print(f"Ошибка при прослушивании для перевода: {e}")
                        print("Переинициализация STT...")
                        try:
                            cleanup_stt()
                            get_recognizer().initialize()
                        except Exception as init_error:
                            print(f"Ошибка переинициализации STT: {init_error}")
                        speak("Произошла ошибка при распознавании речи.")
                        skip_wakeword = False
                        continue

                if not text_to_translate:
                    speak("Я не расслышал текст для перевода.")
                    skip_wakeword = False
                    continue

                # Перевод через AI
                translated_text = translate_text(
                    text_to_translate, source_lang, target_lang
                )
                clean_text = clean_response(translated_text, language=target_lang)

                last_response = clean_text

                # Озвучиваем
                completed = speak(
                    clean_text,
                    check_interrupt=check_wakeword_once,
                    language=target_lang,
                )
                stop_wakeword_monitoring()
                skip_wakeword = True

                if not completed:
                    print("⏹️ Перевод прерван")
                continue

            # Игра "Города"
            cities_response = cities_game.handle(command_text)
            if cities_response:
                clean_cities_response = clean_response(cities_response, language="ru")
                speak(clean_cities_response)
                last_response = clean_cities_response
                skip_wakeword = True
                continue

            # AI запрос
            chat_history.append({"role": "user", "content": user_text})

            full_response = ""
            interrupted = False

            try:
                # Streaming от AI
                stream_generator = ask_ai_stream(list(chat_history))

                print("🤖 AI: ", end="", flush=True)

                for chunk in stream_generator:
                    full_response += chunk
                    print(chunk, end="", flush=True)

            except Exception as e:
                print(f"\n❌ Ошибка: {e}")
                speak("Произошла ошибка при получении ответа.")
            else:
                clean_ai_response = clean_response(full_response, language="ru")
                if clean_ai_response.strip():
                    interrupted = not speak(
                        clean_ai_response,
                        check_interrupt=check_wakeword_once,
                        language="ru",
                    )

            print()

            # Сохраняем ответ
            chat_history.append({"role": "assistant", "content": full_response})
            last_response = clean_response(full_response, language="ru")

            stop_wakeword_monitoring()
            skip_wakeword = True

            if interrupted:
                print("⏹️ Ответ прерван")

            print()
            print("-" * 30)
            print()

        except KeyboardInterrupt:
            signal_handler(None, None)
        except Exception as e:
            print(f"❌ Ошибка: {e}")
            speak("Произошла ошибка. Попробуйте ещё раз.")
            skip_wakeword = False


if __name__ == "__main__":
    main()