291 lines
10 KiB
Python
291 lines
10 KiB
Python
"""
|
||
Smart Speaker - Main Application
|
||
Голосовой ассистент с wake word detection, STT, AI и TTS.
|
||
|
||
Flow:
|
||
1. Wait for wake word ("Alexandr")
|
||
2. Listen to user speech (STT)
|
||
3. Send query to AI (Perplexity)
|
||
4. Clean response from markdown
|
||
5. Speak response (TTS)
|
||
6. Loop back to step 1
|
||
"""
|
||
|
||
import signal
|
||
import sys
|
||
import re
|
||
import threading
|
||
from collections import deque
|
||
|
||
from wakeword import (
|
||
wait_for_wakeword,
|
||
cleanup as cleanup_wakeword,
|
||
check_wakeword_once,
|
||
stop_monitoring as stop_wakeword_monitoring,
|
||
)
|
||
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
||
from ai import ask_ai, translate_text
|
||
from cleaner import clean_response
|
||
from tts import speak, initialize as init_tts
|
||
from sound_level import set_volume, parse_volume_text
|
||
from alarm import get_alarm_clock
|
||
|
||
|
||
def signal_handler(sig, frame):
|
||
"""Handle Ctrl+C gracefully."""
|
||
print("\n\n👋 Завершение работы...")
|
||
cleanup_wakeword()
|
||
cleanup_stt()
|
||
sys.exit(0)
|
||
|
||
|
||
def parse_translation_request(text: str):
|
||
"""
|
||
Detect translation commands and extract language direction and text.
|
||
|
||
Returns:
|
||
dict with source_lang, target_lang, text or None
|
||
"""
|
||
patterns = [
|
||
(r"^переведи на английский\s*(.*)$", "ru", "en"),
|
||
(r"^переведи на русский\s*(.*)$", "en", "ru"),
|
||
(r"^переведи с английского\s*(.*)$", "en", "ru"),
|
||
(r"^переведи с русского\s*(.*)$", "ru", "en"),
|
||
(r"^как по[-\s]?английски\s*(.*)$", "ru", "en"),
|
||
(r"^как по[-\s]?русски\s*(.*)$", "en", "ru"),
|
||
(r"^translate (?:to|into) english\s*(.*)$", "ru", "en"),
|
||
(r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"),
|
||
(r"^translate from english\s*(.*)$", "en", "ru"),
|
||
(r"^translate from russian\s*(.*)$", "ru", "en"),
|
||
]
|
||
|
||
for pattern, source_lang, target_lang in patterns:
|
||
match = re.match(pattern, text, flags=re.IGNORECASE)
|
||
if match:
|
||
return {
|
||
"source_lang": source_lang,
|
||
"target_lang": target_lang,
|
||
"text": match.group(1).strip(),
|
||
}
|
||
return None
|
||
|
||
|
||
def main():
|
||
"""Main application loop."""
|
||
print("=" * 50)
|
||
print("🔊 УМНАЯ КОЛОНКА")
|
||
print("=" * 50)
|
||
print("Скажите 'Alexandr' для активации")
|
||
print("Нажмите Ctrl+C для выхода")
|
||
print("=" * 50)
|
||
print()
|
||
|
||
# Setup signal handler for graceful exit
|
||
signal.signal(signal.SIGINT, signal_handler)
|
||
|
||
# Pre-initialize models (takes a few seconds)
|
||
print("⏳ Инициализация моделей...")
|
||
init_errors = []
|
||
|
||
def init_stt():
|
||
try:
|
||
get_recognizer().initialize()
|
||
except Exception as e:
|
||
init_errors.append(e)
|
||
|
||
def init_tts_model():
|
||
try:
|
||
init_tts()
|
||
except Exception as e:
|
||
init_errors.append(e)
|
||
|
||
stt_thread = threading.Thread(target=init_stt, daemon=True)
|
||
tts_thread = threading.Thread(target=init_tts_model, daemon=True)
|
||
stt_thread.start()
|
||
tts_thread.start()
|
||
stt_thread.join()
|
||
tts_thread.join()
|
||
|
||
if init_errors:
|
||
raise init_errors[0]
|
||
|
||
alarm_clock = get_alarm_clock() # Initialize Alarm Clock
|
||
print()
|
||
|
||
# Initialize chat history (last 10 exchanges = 20 messages)
|
||
chat_history = deque(maxlen=20)
|
||
|
||
# Main loop
|
||
skip_wakeword = False
|
||
while True:
|
||
try:
|
||
# Ensure wake word detector stream is closed before listening
|
||
stop_wakeword_monitoring()
|
||
|
||
# Check for alarms every loop iteration
|
||
if alarm_clock.check_alarms():
|
||
# If alarm triggered and finished (user stopped it), we continue loop
|
||
# The alarm.trigger_alarm() blocks until stopped.
|
||
skip_wakeword = False # Reset state after alarm
|
||
continue
|
||
|
||
# Step 1: Wait for wake word or Follow-up listen
|
||
if not skip_wakeword:
|
||
# Wait with timeout to allow alarm checking
|
||
detected = wait_for_wakeword(timeout=1.0)
|
||
|
||
# If timeout (not detected), loop again to check alarms
|
||
if not detected:
|
||
continue
|
||
|
||
# Standard listen after activation
|
||
user_text = listen(timeout_seconds=7.0)
|
||
else:
|
||
# Follow-up listen (wait 5.0s for start)
|
||
print("👂 Слушаю продолжение диалога (5 сек)...")
|
||
user_text = listen(timeout_seconds=10.0, detection_timeout=5.0)
|
||
|
||
if not user_text:
|
||
# User didn't continue conversation, go back to sleep silently
|
||
skip_wakeword = False
|
||
continue
|
||
|
||
# Step 2: Check if speech was recognized
|
||
if not user_text:
|
||
# If this was a direct wake word activation but no speech
|
||
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
||
skip_wakeword = False # Reset to wake word
|
||
continue
|
||
|
||
# Check for stop commands
|
||
user_text_lower = user_text.lower().strip()
|
||
if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]:
|
||
print("_" * 50)
|
||
print("💤 Жду 'Alexandr' для активации...")
|
||
skip_wakeword = False
|
||
continue
|
||
|
||
# Check for alarm commands
|
||
alarm_response = alarm_clock.parse_command(user_text)
|
||
if alarm_response:
|
||
speak(alarm_response)
|
||
continue
|
||
|
||
# Check for volume command
|
||
if user_text.lower().startswith("громкость"):
|
||
try:
|
||
# Remove "громкость" prefix and strip whitespace
|
||
vol_str = user_text.lower().replace("громкость", "", 1).strip()
|
||
|
||
# Try to parse the number
|
||
level = parse_volume_text(vol_str)
|
||
|
||
if level is not None:
|
||
if set_volume(level):
|
||
speak(f"Громкость установлена на {level}")
|
||
else:
|
||
speak("Не удалось установить громкость.")
|
||
else:
|
||
speak(
|
||
"Я не понял число громкости. Скажите число от одного до десяти."
|
||
)
|
||
|
||
continue
|
||
except Exception as e:
|
||
print(f"❌ Ошибка громкости: {e}")
|
||
speak("Не удалось изменить громкость.")
|
||
continue
|
||
|
||
# Check for translation commands
|
||
translation_request = parse_translation_request(user_text)
|
||
if translation_request:
|
||
source_lang = translation_request["source_lang"]
|
||
target_lang = translation_request["target_lang"]
|
||
text_to_translate = translation_request["text"]
|
||
|
||
if not text_to_translate:
|
||
prompt = (
|
||
"Скажи фразу на английском."
|
||
if source_lang == "en"
|
||
else "Скажи фразу на русском."
|
||
)
|
||
speak(prompt)
|
||
text_to_translate = listen(
|
||
timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
|
||
)
|
||
|
||
if not text_to_translate:
|
||
speak("Я не расслышал текст для перевода.")
|
||
skip_wakeword = False
|
||
continue
|
||
|
||
translated_text = translate_text(
|
||
text_to_translate, source_lang, target_lang
|
||
)
|
||
clean_text = clean_response(translated_text, language=target_lang)
|
||
|
||
completed = speak(
|
||
clean_text,
|
||
check_interrupt=check_wakeword_once,
|
||
language=target_lang,
|
||
)
|
||
stop_wakeword_monitoring()
|
||
skip_wakeword = True
|
||
|
||
if not completed:
|
||
print("⏹️ Перевод прерван - слушаю следующий вопрос")
|
||
continue
|
||
|
||
# Step 3: Send to AI
|
||
# Add user message to history
|
||
chat_history.append({"role": "user", "content": user_text})
|
||
|
||
# Get response using history
|
||
ai_response = ask_ai(list(chat_history))
|
||
|
||
# Add AI response to history
|
||
chat_history.append({"role": "assistant", "content": ai_response})
|
||
|
||
# Step 4: Clean response
|
||
clean_text = clean_response(ai_response, language="ru")
|
||
|
||
# Step 5: Speak response (with wake word interrupt support)
|
||
# This uses check_wakeword_once which opens/closes stream as needed
|
||
completed = speak(
|
||
clean_text, check_interrupt=check_wakeword_once, language="ru"
|
||
)
|
||
|
||
# Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once)
|
||
stop_wakeword_monitoring()
|
||
|
||
# Enable follow-up mode for next iteration
|
||
skip_wakeword = True
|
||
|
||
# If interrupted by wake word, we still want to skip_wakeword (which is set above)
|
||
# but we can print a message
|
||
if not completed:
|
||
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
||
# If interrupted, we treat it as immediate follow up?
|
||
# Usually interruption means "I have a new command"
|
||
# So skip_wakeword = True is correct.
|
||
# But we might want to listen IMMEDIATELY without waiting 5s for start?
|
||
# listen() handles that.
|
||
pass
|
||
|
||
print()
|
||
print("-" * 30)
|
||
print()
|
||
|
||
# Step 6: Loop continues with skip_wakeword=True
|
||
|
||
except KeyboardInterrupt:
|
||
signal_handler(None, None)
|
||
except Exception as e:
|
||
print(f"❌ Ошибка: {e}")
|
||
speak("Произошла ошибка. Попробуйте ещё раз.")
|
||
skip_wakeword = False
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|