feat: improve semantic voice control and music playback
This commit is contained in:
115
app/main.py
115
app/main.py
@@ -33,7 +33,7 @@ from .audio.wakeword import (
|
||||
from .audio.wakeword import (
|
||||
stop_monitoring as stop_wakeword_monitoring,
|
||||
)
|
||||
from .core.ai import ask_ai_stream, translate_text
|
||||
from .core.ai import ask_ai_stream, interpret_assistant_intent, translate_text
|
||||
from .core.config import BASE_DIR, WAKE_WORD
|
||||
from .core.cleaner import clean_response
|
||||
from .core.commands import is_stop_command
|
||||
@@ -163,6 +163,10 @@ _CITY_PATTERNS = [
|
||||
),
|
||||
]
|
||||
|
||||
_SEMANTIC_INTENT_MIN_CONFIDENCE = 0.55
|
||||
_SEMANTIC_MUSIC_MIN_CONFIDENCE = 0.45
|
||||
_SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE = 0.72
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
"""Обработчик Ctrl+C."""
|
||||
@@ -311,7 +315,7 @@ def main():
|
||||
continue # Продолжаем цикл
|
||||
else:
|
||||
# Follow-up режим — без wake word
|
||||
print(f"👂 Слушаю ({followup_idle_timeout_seconds:.0f} сек)...")
|
||||
print(f"👂 Слушаю ({followup_idle_timeout_seconds:.1f} сек)...")
|
||||
try:
|
||||
user_text = listen(
|
||||
timeout_seconds=7.0,
|
||||
@@ -341,6 +345,11 @@ def main():
|
||||
|
||||
# Проверка на команду "Стоп"
|
||||
if is_stop_command(user_text):
|
||||
music_controller = get_music_controller()
|
||||
music_stop_response = music_controller.pause_for_stop_word()
|
||||
if music_stop_response:
|
||||
print(f"🎵 {music_stop_response}")
|
||||
|
||||
if stopwatch_manager.has_running_stopwatches():
|
||||
stopwatch_stop_response = stopwatch_manager.pause_stopwatches()
|
||||
clean_stopwatch_stop_response = clean_response(
|
||||
@@ -369,8 +378,93 @@ def main():
|
||||
skip_wakeword = True
|
||||
continue
|
||||
|
||||
effective_text = user_text
|
||||
semantic_intent = interpret_assistant_intent(user_text)
|
||||
semantic_type = str(semantic_intent.get("intent", "none")).strip().lower()
|
||||
try:
|
||||
semantic_confidence = float(
|
||||
semantic_intent.get("confidence", 0.0) or 0.0
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
semantic_confidence = 0.0
|
||||
semantic_command = str(semantic_intent.get("normalized_command", "")).strip()
|
||||
semantic_music_action = (
|
||||
str(semantic_intent.get("music_action", "none")).strip().lower()
|
||||
)
|
||||
semantic_music_query = str(semantic_intent.get("music_query", "")).strip()
|
||||
|
||||
if (
|
||||
semantic_type == "stop"
|
||||
and semantic_confidence >= _SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE
|
||||
):
|
||||
music_controller = get_music_controller()
|
||||
music_stop_response = music_controller.pause_for_stop_word()
|
||||
if music_stop_response:
|
||||
print(f"🎵 {music_stop_response}")
|
||||
|
||||
if stopwatch_manager.has_running_stopwatches():
|
||||
stopwatch_stop_response = stopwatch_manager.pause_stopwatches()
|
||||
clean_stopwatch_stop_response = clean_response(
|
||||
stopwatch_stop_response, language="ru"
|
||||
)
|
||||
speak(clean_stopwatch_stop_response)
|
||||
last_response = clean_stopwatch_stop_response
|
||||
skip_wakeword = False
|
||||
continue
|
||||
print("_" * 50)
|
||||
print(f"💤 Жду '{WAKE_WORD}'...")
|
||||
skip_wakeword = False
|
||||
continue
|
||||
|
||||
if (
|
||||
semantic_type == "repeat"
|
||||
and semantic_confidence >= _SEMANTIC_REPEAT_STOP_MIN_CONFIDENCE
|
||||
):
|
||||
if last_response:
|
||||
print(f"🔁 Повторяю: {last_response}")
|
||||
speak(last_response)
|
||||
else:
|
||||
speak("Я еще ничего не говорил.")
|
||||
skip_wakeword = True
|
||||
continue
|
||||
|
||||
if (
|
||||
semantic_type == "music"
|
||||
and semantic_confidence >= _SEMANTIC_MUSIC_MIN_CONFIDENCE
|
||||
):
|
||||
music_controller = get_music_controller()
|
||||
semantic_music_response = music_controller.handle_semantic_action(
|
||||
semantic_music_action,
|
||||
semantic_music_query,
|
||||
)
|
||||
if semantic_music_response:
|
||||
clean_music_response = clean_response(
|
||||
semantic_music_response, language="ru"
|
||||
)
|
||||
speak(clean_music_response)
|
||||
last_response = clean_music_response
|
||||
skip_wakeword = True
|
||||
continue
|
||||
|
||||
if (
|
||||
semantic_command
|
||||
and semantic_confidence >= _SEMANTIC_INTENT_MIN_CONFIDENCE
|
||||
and semantic_type
|
||||
in {
|
||||
"music",
|
||||
"timer",
|
||||
"alarm",
|
||||
"weather",
|
||||
"volume",
|
||||
"translation",
|
||||
"cities",
|
||||
}
|
||||
):
|
||||
effective_text = semantic_command
|
||||
print(f"🧠 Команда: '{user_text}' -> '{effective_text}'")
|
||||
|
||||
# Small-talk
|
||||
smalltalk_response = get_smalltalk_response(user_text)
|
||||
smalltalk_response = get_smalltalk_response(effective_text)
|
||||
if smalltalk_response:
|
||||
clean_smalltalk = clean_response(smalltalk_response, language="ru")
|
||||
speak(clean_smalltalk)
|
||||
@@ -378,7 +472,7 @@ def main():
|
||||
skip_wakeword = True
|
||||
continue
|
||||
|
||||
command_text = user_text
|
||||
command_text = effective_text
|
||||
command_text_lower = command_text.lower()
|
||||
if pending_time_target == "timer" and "таймер" not in command_text_lower:
|
||||
command_text = f"таймер {command_text}"
|
||||
@@ -427,9 +521,9 @@ def main():
|
||||
continue
|
||||
|
||||
# Громкость
|
||||
if user_text.lower().startswith("громкость"):
|
||||
if command_text.lower().startswith("громкость"):
|
||||
try:
|
||||
vol_str = user_text.lower().replace("громкость", "", 1).strip()
|
||||
vol_str = command_text.lower().replace("громкость", "", 1).strip()
|
||||
level = parse_volume_text(vol_str)
|
||||
|
||||
if level is not None:
|
||||
@@ -455,7 +549,7 @@ def main():
|
||||
|
||||
# Погода
|
||||
requested_city = None
|
||||
user_text_lower = user_text.lower()
|
||||
user_text_lower = command_text.lower()
|
||||
|
||||
for pattern in _CITY_PATTERNS:
|
||||
match = pattern.search(user_text_lower)
|
||||
@@ -487,7 +581,7 @@ def main():
|
||||
|
||||
# Музыка
|
||||
music_controller = get_music_controller()
|
||||
music_response = music_controller.parse_command(user_text)
|
||||
music_response = music_controller.parse_command(command_text)
|
||||
if music_response:
|
||||
clean_music_response = clean_response(music_response, language="ru")
|
||||
speak(clean_music_response)
|
||||
@@ -496,7 +590,7 @@ def main():
|
||||
continue
|
||||
|
||||
# Перевод
|
||||
translation_request = parse_translation_request(user_text)
|
||||
translation_request = parse_translation_request(command_text)
|
||||
if translation_request:
|
||||
source_lang = translation_request["source_lang"]
|
||||
target_lang = translation_request["target_lang"]
|
||||
@@ -553,8 +647,7 @@ def main():
|
||||
continue
|
||||
|
||||
# Игра "Города"
|
||||
cities_response = cities_game.handle(user_text)
|
||||
cities_response = cities_game.handle(user_text)
|
||||
cities_response = cities_game.handle(command_text)
|
||||
if cities_response:
|
||||
clean_cities_response = clean_response(cities_response, language="ru")
|
||||
speak(clean_cities_response)
|
||||
|
||||
Reference in New Issue
Block a user