second commit, before making function of memory in api query
This commit is contained in:
10
ai.py
10
ai.py
@@ -7,10 +7,12 @@ from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL
|
|||||||
|
|
||||||
|
|
||||||
# System prompt for the AI
|
# System prompt for the AI
|
||||||
SYSTEM_PROMPT = """Ты — голосовой ассистент умной колонки.
|
SYSTEM_PROMPT = """Ты — Александр, умный голосовой ассистент с человеческим поведением.
|
||||||
Отвечай кратко, по существу, на русском языке.
|
Веди себя как живой человек: будь дружелюбным, естественным и немного эмоциональным, где это уместно.
|
||||||
Избегай длинных списков и сложного форматирования.
|
Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
|
||||||
Твои ответы будут озвучены голосом, поэтому пиши естественным разговорным языком."""
|
Отвечай кратко и по существу, на русском языке.
|
||||||
|
Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
|
||||||
|
Пиши в разговорном стиле, как при живом общении."""
|
||||||
|
|
||||||
|
|
||||||
def ask_ai(user_message: str) -> str:
|
def ask_ai(user_message: str) -> str:
|
||||||
|
|||||||
41
main.py
41
main.py
@@ -10,11 +10,12 @@ Flow:
|
|||||||
5. Speak response (TTS)
|
5. Speak response (TTS)
|
||||||
6. Loop back to step 1
|
6. Loop back to step 1
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
||||||
from stt import listen, cleanup as cleanup_stt
|
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
||||||
from ai import ask_ai
|
from ai import ask_ai
|
||||||
from cleaner import clean_response
|
from cleaner import clean_response
|
||||||
from tts import speak, initialize as init_tts
|
from tts import speak, initialize as init_tts
|
||||||
@@ -42,24 +43,35 @@ def main():
|
|||||||
# Setup signal handler for graceful exit
|
# Setup signal handler for graceful exit
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
# Pre-initialize TTS model (takes a few seconds)
|
# Pre-initialize models (takes a few seconds)
|
||||||
print("⏳ Инициализация...")
|
print("⏳ Инициализация моделей...")
|
||||||
init_tts()
|
get_recognizer().initialize() # Initialize STT model first
|
||||||
|
init_tts() # Then initialize TTS model
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# Main loop
|
# Main loop
|
||||||
skip_wakeword = False
|
skip_wakeword = False
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# Step 1: Wait for wake word
|
# Step 1: Wait for wake word or Follow-up listen
|
||||||
if not skip_wakeword:
|
if not skip_wakeword:
|
||||||
wait_for_wakeword()
|
wait_for_wakeword()
|
||||||
|
# Standard listen after activation
|
||||||
|
user_text = listen(timeout_seconds=7.0)
|
||||||
|
else:
|
||||||
|
# Follow-up listen (wait 2.0s for start, then listen long)
|
||||||
|
print("👂 Слушаю продолжение диалога...")
|
||||||
|
user_text = listen(timeout_seconds=20.0, detection_timeout=2.0)
|
||||||
|
|
||||||
|
if not user_text:
|
||||||
|
# User didn't continue conversation, go back to sleep
|
||||||
|
skip_wakeword = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Reset flag for now (will be set to True if we speak successfully)
|
||||||
skip_wakeword = False
|
skip_wakeword = False
|
||||||
|
|
||||||
# Step 2: Listen to user speech
|
# Step 2: Check if speech was recognized
|
||||||
user_text = listen(timeout_seconds=7.0)
|
|
||||||
|
|
||||||
if not user_text:
|
if not user_text:
|
||||||
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
||||||
continue
|
continue
|
||||||
@@ -79,7 +91,9 @@ def main():
|
|||||||
else:
|
else:
|
||||||
speak("Не удалось установить громкость.")
|
speak("Не удалось установить громкость.")
|
||||||
else:
|
else:
|
||||||
speak("Я не понял число громкости. Скажите число от одного до десяти.")
|
speak(
|
||||||
|
"Я не понял число громкости. Скажите число от одного до десяти."
|
||||||
|
)
|
||||||
|
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -96,17 +110,20 @@ def main():
|
|||||||
# Step 5: Speak response (with wake word interrupt support)
|
# Step 5: Speak response (with wake word interrupt support)
|
||||||
completed = speak(clean_text, check_interrupt=check_wakeword_once)
|
completed = speak(clean_text, check_interrupt=check_wakeword_once)
|
||||||
|
|
||||||
# If interrupted by wake word, go back to waiting for wake word
|
# Enable follow-up mode for next iteration
|
||||||
|
skip_wakeword = True
|
||||||
|
|
||||||
|
# If interrupted by wake word, we still want to skip_wakeword (which is set above)
|
||||||
|
# but we can print a message
|
||||||
if not completed:
|
if not completed:
|
||||||
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
||||||
skip_wakeword = True
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("-" * 30)
|
print("-" * 30)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# Step 6: Loop continues...
|
# Step 6: Loop continues with skip_wakeword=True
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
signal_handler(None, None)
|
signal_handler(None, None)
|
||||||
|
|||||||
19
stt.py
19
stt.py
@@ -34,12 +34,13 @@ class SpeechRecognizer:
|
|||||||
)
|
)
|
||||||
print("✅ Модель Vosk загружена")
|
print("✅ Модель Vosk загружена")
|
||||||
|
|
||||||
def listen(self, timeout_seconds: float = 5.0) -> str:
|
def listen(self, timeout_seconds: float = 5.0, detection_timeout: float = None) -> str:
|
||||||
"""
|
"""
|
||||||
Listen to microphone and transcribe speech.
|
Listen to microphone and transcribe speech.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
timeout_seconds: Maximum time to listen for speech
|
timeout_seconds: Maximum time to listen for speech
|
||||||
|
detection_timeout: Time to wait for speech to start. If None, uses timeout_seconds.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Transcribed text from speech
|
Transcribed text from speech
|
||||||
@@ -53,10 +54,13 @@ class SpeechRecognizer:
|
|||||||
self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE)
|
self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE)
|
||||||
|
|
||||||
frames_to_read = int(SAMPLE_RATE * timeout_seconds / 4096)
|
frames_to_read = int(SAMPLE_RATE * timeout_seconds / 4096)
|
||||||
|
detection_frames = int(SAMPLE_RATE * detection_timeout / 4096) if detection_timeout else frames_to_read
|
||||||
|
|
||||||
silence_frames = 0
|
silence_frames = 0
|
||||||
max_silence_frames = 10 # About 2.5 seconds of silence
|
max_silence_frames = 10 # About 2.5 seconds of silence
|
||||||
|
speech_started = False
|
||||||
|
|
||||||
for _ in range(frames_to_read):
|
for i in range(frames_to_read):
|
||||||
data = self.stream.read(4096, exception_on_overflow=False)
|
data = self.stream.read(4096, exception_on_overflow=False)
|
||||||
|
|
||||||
if self.recognizer.AcceptWaveform(data):
|
if self.recognizer.AcceptWaveform(data):
|
||||||
@@ -71,9 +75,14 @@ class SpeechRecognizer:
|
|||||||
partial = json.loads(self.recognizer.PartialResult())
|
partial = json.loads(self.recognizer.PartialResult())
|
||||||
if partial.get("partial", ""):
|
if partial.get("partial", ""):
|
||||||
silence_frames = 0
|
silence_frames = 0
|
||||||
|
speech_started = True
|
||||||
else:
|
else:
|
||||||
silence_frames += 1
|
silence_frames += 1
|
||||||
|
|
||||||
|
# Check detection timeout
|
||||||
|
if not speech_started and i > detection_frames:
|
||||||
|
break
|
||||||
|
|
||||||
# Stop if too much silence after speech
|
# Stop if too much silence after speech
|
||||||
if silence_frames > max_silence_frames:
|
if silence_frames > max_silence_frames:
|
||||||
break
|
break
|
||||||
@@ -85,6 +94,8 @@ class SpeechRecognizer:
|
|||||||
if text:
|
if text:
|
||||||
print(f"📝 Распознано: {text}")
|
print(f"📝 Распознано: {text}")
|
||||||
else:
|
else:
|
||||||
|
# Only print if we weren't just checking for presence of speech
|
||||||
|
if not detection_timeout or speech_started:
|
||||||
print("⚠️ Речь не распознана")
|
print("⚠️ Речь не распознана")
|
||||||
|
|
||||||
return text
|
return text
|
||||||
@@ -109,9 +120,9 @@ def get_recognizer() -> SpeechRecognizer:
|
|||||||
return _recognizer
|
return _recognizer
|
||||||
|
|
||||||
|
|
||||||
def listen(timeout_seconds: float = 5.0) -> str:
|
def listen(timeout_seconds: float = 5.0, detection_timeout: float = None) -> str:
|
||||||
"""Listen to microphone and return transcribed text."""
|
"""Listen to microphone and return transcribed text."""
|
||||||
return get_recognizer().listen(timeout_seconds)
|
return get_recognizer().listen(timeout_seconds, detection_timeout)
|
||||||
|
|
||||||
|
|
||||||
def cleanup():
|
def cleanup():
|
||||||
|
|||||||
Reference in New Issue
Block a user