feat: improve semantic voice control and music playback

This commit is contained in:
2026-03-15 14:40:33 +03:00
parent e1a94c68db
commit cb54a9ee75
8 changed files with 1656 additions and 276 deletions

View File

@@ -33,7 +33,7 @@ from .audio.wakeword import (
from .audio.wakeword import (
stop_monitoring as stop_wakeword_monitoring,
)
from .core.ai import ask_ai_stream, translate_text
from .core.ai import ask_ai_stream, interpret_assistant_intent, translate_text
from .core.config import BASE_DIR, WAKE_WORD
from .core.cleaner import clean_response
from .core.commands import is_stop_command
@@ -163,6 +163,10 @@ _CITY_PATTERNS = [
),
]
_SEMANTIC_INTENT_MIN_CONFIDENCE = 0.55
_SEMANTIC_MUSIC_MIN_CONFIDENCE = 0.45
_SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE = 0.72
def signal_handler(sig, frame):
"""Обработчик Ctrl+C."""
@@ -311,7 +315,7 @@ def main():
continue # Продолжаем цикл
else:
# Follow-up режим — без wake word
print(f"👂 Слушаю ({followup_idle_timeout_seconds:.0f} сек)...")
print(f"👂 Слушаю ({followup_idle_timeout_seconds:.1f} сек)...")
try:
user_text = listen(
timeout_seconds=7.0,
@@ -341,6 +345,11 @@ def main():
# Проверка на команду "Стоп"
if is_stop_command(user_text):
music_controller = get_music_controller()
music_stop_response = music_controller.pause_for_stop_word()
if music_stop_response:
print(f"🎵 {music_stop_response}")
if stopwatch_manager.has_running_stopwatches():
stopwatch_stop_response = stopwatch_manager.pause_stopwatches()
clean_stopwatch_stop_response = clean_response(
@@ -369,8 +378,93 @@ def main():
skip_wakeword = True
continue
effective_text = user_text
semantic_intent = interpret_assistant_intent(user_text)
semantic_type = str(semantic_intent.get("intent", "none")).strip().lower()
try:
semantic_confidence = float(
semantic_intent.get("confidence", 0.0) or 0.0
)
except (TypeError, ValueError):
semantic_confidence = 0.0
semantic_command = str(semantic_intent.get("normalized_command", "")).strip()
semantic_music_action = (
str(semantic_intent.get("music_action", "none")).strip().lower()
)
semantic_music_query = str(semantic_intent.get("music_query", "")).strip()
if (
semantic_type == "stop"
and semantic_confidence >= _SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE
):
music_controller = get_music_controller()
music_stop_response = music_controller.pause_for_stop_word()
if music_stop_response:
print(f"🎵 {music_stop_response}")
if stopwatch_manager.has_running_stopwatches():
stopwatch_stop_response = stopwatch_manager.pause_stopwatches()
clean_stopwatch_stop_response = clean_response(
stopwatch_stop_response, language="ru"
)
speak(clean_stopwatch_stop_response)
last_response = clean_stopwatch_stop_response
skip_wakeword = False
continue
print("_" * 50)
print(f"💤 Жду '{WAKE_WORD}'...")
skip_wakeword = False
continue
if (
semantic_type == "repeat"
and semantic_confidence >= _SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE
):
if last_response:
print(f"🔁 Повторяю: {last_response}")
speak(last_response)
else:
speak("Я еще ничего не говорил.")
skip_wakeword = True
continue
if (
semantic_type == "music"
and semantic_confidence >= _SEMANTIC_MUSIC_MIN_CONFIDENCE
):
music_controller = get_music_controller()
semantic_music_response = music_controller.handle_semantic_action(
semantic_music_action,
semantic_music_query,
)
if semantic_music_response:
clean_music_response = clean_response(
semantic_music_response, language="ru"
)
speak(clean_music_response)
last_response = clean_music_response
skip_wakeword = True
continue
if (
semantic_command
and semantic_confidence >= _SEMANTIC_INTENT_MIN_CONFIDENCE
and semantic_type
in {
"music",
"timer",
"alarm",
"weather",
"volume",
"translation",
"cities",
}
):
effective_text = semantic_command
print(f"🧠 Команда: '{user_text}' -> '{effective_text}'")
# Small-talk
smalltalk_response = get_smalltalk_response(user_text)
smalltalk_response = get_smalltalk_response(effective_text)
if smalltalk_response:
clean_smalltalk = clean_response(smalltalk_response, language="ru")
speak(clean_smalltalk)
@@ -378,7 +472,7 @@ def main():
skip_wakeword = True
continue
command_text = user_text
command_text = effective_text
command_text_lower = command_text.lower()
if pending_time_target == "timer" and "таймер" not in command_text_lower:
command_text = f"таймер {command_text}"
@@ -427,9 +521,9 @@ def main():
continue
# Громкость
if user_text.lower().startswith("громкость"):
if command_text.lower().startswith("громкость"):
try:
vol_str = user_text.lower().replace("громкость", "", 1).strip()
vol_str = command_text.lower().replace("громкость", "", 1).strip()
level = parse_volume_text(vol_str)
if level is not None:
@@ -455,7 +549,7 @@ def main():
# Погода
requested_city = None
user_text_lower = user_text.lower()
user_text_lower = command_text.lower()
for pattern in _CITY_PATTERNS:
match = pattern.search(user_text_lower)
@@ -487,7 +581,7 @@ def main():
# Музыка
music_controller = get_music_controller()
music_response = music_controller.parse_command(user_text)
music_response = music_controller.parse_command(command_text)
if music_response:
clean_music_response = clean_response(music_response, language="ru")
speak(clean_music_response)
@@ -496,7 +590,7 @@ def main():
continue
# Перевод
translation_request = parse_translation_request(user_text)
translation_request = parse_translation_request(command_text)
if translation_request:
source_lang = translation_request["source_lang"]
target_lang = translation_request["target_lang"]
@@ -553,8 +647,7 @@ def main():
continue
# Игра "Города"
cities_response = cities_game.handle(user_text)
cities_response = cities_game.handle(user_text)
cities_response = cities_game.handle(command_text)
if cities_response:
clean_cities_response = clean_response(cities_response, language="ru")
speak(clean_cities_response)