Files
smart-speaker/main.py

291 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Smart Speaker - Main Application
Голосовой ассистент с wake word detection, STT, AI и TTS.
Flow:
1. Wait for wake word ("Alexandr")
2. Listen to user speech (STT)
3. Send query to AI (Perplexity)
4. Clean response from markdown
5. Speak response (TTS)
6. Loop back to step 1
"""
import signal
import sys
import re
import threading
from collections import deque
from wakeword import (
wait_for_wakeword,
cleanup as cleanup_wakeword,
check_wakeword_once,
stop_monitoring as stop_wakeword_monitoring,
)
from stt import listen, cleanup as cleanup_stt, get_recognizer
from ai import ask_ai, translate_text
from cleaner import clean_response
from tts import speak, initialize as init_tts
from sound_level import set_volume, parse_volume_text
from alarm import get_alarm_clock
def signal_handler(sig, frame):
"""Handle Ctrl+C gracefully."""
print("\n\n👋 Завершение работы...")
cleanup_wakeword()
cleanup_stt()
sys.exit(0)
def parse_translation_request(text: str):
"""
Detect translation commands and extract language direction and text.
Returns:
dict with source_lang, target_lang, text or None
"""
patterns = [
(r"^переведи на английский\s*(.*)$", "ru", "en"),
(r"^переведи на русский\s*(.*)$", "en", "ru"),
(r"^переведи с английского\s*(.*)$", "en", "ru"),
(r"^переведи с русского\s*(.*)$", "ru", "en"),
(r"^как по[-\s]?английски\s*(.*)$", "ru", "en"),
(r"^как по[-\s]?русски\s*(.*)$", "en", "ru"),
(r"^translate (?:to|into) english\s*(.*)$", "ru", "en"),
(r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"),
(r"^translate from english\s*(.*)$", "en", "ru"),
(r"^translate from russian\s*(.*)$", "ru", "en"),
]
for pattern, source_lang, target_lang in patterns:
match = re.match(pattern, text, flags=re.IGNORECASE)
if match:
return {
"source_lang": source_lang,
"target_lang": target_lang,
"text": match.group(1).strip(),
}
return None
def main():
"""Main application loop."""
print("=" * 50)
print("🔊 УМНАЯ КОЛОНКА")
print("=" * 50)
print("Скажите 'Alexandr' для активации")
print("Нажмите Ctrl+C для выхода")
print("=" * 50)
print()
# Setup signal handler for graceful exit
signal.signal(signal.SIGINT, signal_handler)
# Pre-initialize models (takes a few seconds)
print("⏳ Инициализация моделей...")
init_errors = []
def init_stt():
try:
get_recognizer().initialize()
except Exception as e:
init_errors.append(e)
def init_tts_model():
try:
init_tts()
except Exception as e:
init_errors.append(e)
stt_thread = threading.Thread(target=init_stt, daemon=True)
tts_thread = threading.Thread(target=init_tts_model, daemon=True)
stt_thread.start()
tts_thread.start()
stt_thread.join()
tts_thread.join()
if init_errors:
raise init_errors[0]
alarm_clock = get_alarm_clock() # Initialize Alarm Clock
print()
# Initialize chat history (last 10 exchanges = 20 messages)
chat_history = deque(maxlen=20)
# Main loop
skip_wakeword = False
while True:
try:
# Ensure wake word detector stream is closed before listening
stop_wakeword_monitoring()
# Check for alarms every loop iteration
if alarm_clock.check_alarms():
# If alarm triggered and finished (user stopped it), we continue loop
# The alarm.trigger_alarm() blocks until stopped.
skip_wakeword = False # Reset state after alarm
continue
# Step 1: Wait for wake word or Follow-up listen
if not skip_wakeword:
# Wait with timeout to allow alarm checking
detected = wait_for_wakeword(timeout=1.0)
# If timeout (not detected), loop again to check alarms
if not detected:
continue
# Standard listen after activation
user_text = listen(timeout_seconds=7.0)
else:
# Follow-up listen (wait 5.0s for start)
print("👂 Слушаю продолжение диалога (5 сек)...")
user_text = listen(timeout_seconds=10.0, detection_timeout=5.0)
if not user_text:
# User didn't continue conversation, go back to sleep silently
skip_wakeword = False
continue
# Step 2: Check if speech was recognized
if not user_text:
# If this was a direct wake word activation but no speech
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
skip_wakeword = False # Reset to wake word
continue
# Check for stop commands
user_text_lower = user_text.lower().strip()
if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]:
print("_" * 50)
print("💤 Жду 'Alexandr' для активации...")
skip_wakeword = False
continue
# Check for alarm commands
alarm_response = alarm_clock.parse_command(user_text)
if alarm_response:
speak(alarm_response)
continue
# Check for volume command
if user_text.lower().startswith("громкость"):
try:
# Remove "громкость" prefix and strip whitespace
vol_str = user_text.lower().replace("громкость", "", 1).strip()
# Try to parse the number
level = parse_volume_text(vol_str)
if level is not None:
if set_volume(level):
speak(f"Громкость установлена на {level}")
else:
speak("Не удалось установить громкость.")
else:
speak(
"Я не понял число громкости. Скажите число от одного до десяти."
)
continue
except Exception as e:
print(f"❌ Ошибка громкости: {e}")
speak("Не удалось изменить громкость.")
continue
# Check for translation commands
translation_request = parse_translation_request(user_text)
if translation_request:
source_lang = translation_request["source_lang"]
target_lang = translation_request["target_lang"]
text_to_translate = translation_request["text"]
if not text_to_translate:
prompt = (
"Скажи фразу на английском."
if source_lang == "en"
else "Скажи фразу на русском."
)
speak(prompt)
text_to_translate = listen(
timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
)
if not text_to_translate:
speak("Я не расслышал текст для перевода.")
skip_wakeword = False
continue
translated_text = translate_text(
text_to_translate, source_lang, target_lang
)
clean_text = clean_response(translated_text, language=target_lang)
completed = speak(
clean_text,
check_interrupt=check_wakeword_once,
language=target_lang,
)
stop_wakeword_monitoring()
skip_wakeword = True
if not completed:
print("⏹️ Перевод прерван - слушаю следующий вопрос")
continue
# Step 3: Send to AI
# Add user message to history
chat_history.append({"role": "user", "content": user_text})
# Get response using history
ai_response = ask_ai(list(chat_history))
# Add AI response to history
chat_history.append({"role": "assistant", "content": ai_response})
# Step 4: Clean response
clean_text = clean_response(ai_response, language="ru")
# Step 5: Speak response (with wake word interrupt support)
# This uses check_wakeword_once which opens/closes stream as needed
completed = speak(
clean_text, check_interrupt=check_wakeword_once, language="ru"
)
# Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once)
stop_wakeword_monitoring()
# Enable follow-up mode for next iteration
skip_wakeword = True
# If interrupted by wake word, we still want to skip_wakeword (which is set above)
# but we can print a message
if not completed:
print("⏹️ Ответ прерван - слушаю следующий вопрос")
# If interrupted, we treat it as immediate follow up?
# Usually interruption means "I have a new command"
# So skip_wakeword = True is correct.
# But we might want to listen IMMEDIATELY without waiting 5s for start?
# listen() handles that.
pass
print()
print("-" * 30)
print()
# Step 6: Loop continues with skip_wakeword=True
except KeyboardInterrupt:
signal_handler(None, None)
except Exception as e:
print(f"❌ Ошибка: {e}")
speak("Произошла ошибка. Попробуйте ещё раз.")
skip_wakeword = False
if __name__ == "__main__":
main()