second commit, before making function of memory in api query

2026-01-04 20:08:49 +03:00
parent 51ed78078b
commit 1b4d46e387
3 changed files with 67 additions and 37 deletions
--- a/ai.py
+++ b/ai.py
@@ -7,10 +7,12 @@ from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL


 # System prompt for the AI
-SYSTEM_PROMPT = """Ты — голосовой ассистент умной колонки. 
-Отвечай кратко, по существу, на русском языке.
-Избегай длинных списков и сложного форматирования.
-Твои ответы будут озвучены голосом, поэтому пиши естественным разговорным языком."""
+SYSTEM_PROMPT = """Ты — Александр, умный голосовой ассистент с человеческим поведением.
+Веди себя как живой человек: будь дружелюбным, естественным и немного эмоциональным, где это уместно.
+Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
+Отвечай кратко и по существу, на русском языке.
+Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
+Пиши в разговорном стиле, как при живом общении."""


 def ask_ai(user_message: str) -> str:
--- a/main.py
+++ b/main.py
@@ -10,11 +10,12 @@ Flow:
 5. Speak response (TTS)
 6. Loop back to step 1
 """
+
 import signal
 import sys

 from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
-from stt import listen, cleanup as cleanup_stt
+from stt import listen, cleanup as cleanup_stt, get_recognizer
 from ai import ask_ai
 from cleaner import clean_response
 from tts import speak, initialize as init_tts
@@ -42,24 +43,35 @@ def main():
    # Setup signal handler for graceful exit
    signal.signal(signal.SIGINT, signal_handler)

-    # Pre-initialize TTS model (takes a few seconds)
-    print("⏳ Инициализация...")
-    init_tts()
+    # Pre-initialize models (takes a few seconds)
+    print("⏳ Инициализация моделей...")
+    get_recognizer().initialize()  # Initialize STT model first
+    init_tts()  # Then initialize TTS model
    print()

    # Main loop
    skip_wakeword = False
    while True:
        try:
-            # Step 1: Wait for wake word
+            # Step 1: Wait for wake word or Follow-up listen
            if not skip_wakeword:
                wait_for_wakeword()
+                # Standard listen after activation
+                user_text = listen(timeout_seconds=7.0)
+            else:
+                # Follow-up listen (wait 2.0s for start, then listen long)
+                print("👂 Слушаю продолжение диалога...")
+                user_text = listen(timeout_seconds=20.0, detection_timeout=2.0)
                
+                if not user_text:
+                    # User didn't continue conversation, go back to sleep
+                    skip_wakeword = False
+                    continue
+
+            # Reset flag for now (will be set to True if we speak successfully)
            skip_wakeword = False

-            # Step 2: Listen to user speech
-            user_text = listen(timeout_seconds=7.0)
-            
+            # Step 2: Check if speech was recognized
            if not user_text:
                speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
                continue
@@ -79,7 +91,9 @@ def main():
                        else:
                            speak("Не удалось установить громкость.")
                    else:
-                        speak("Я не понял число громкости. Скажите число от одного до десяти.")
+                        speak(
+                            "Я не понял число громкости. Скажите число от одного до десяти."
+                        )

                    continue
                except Exception as e:
@@ -96,17 +110,20 @@ def main():
            # Step 5: Speak response (with wake word interrupt support)
            completed = speak(clean_text, check_interrupt=check_wakeword_once)

-            # If interrupted by wake word, go back to waiting for wake word
+            # Enable follow-up mode for next iteration
+            skip_wakeword = True
+
+            # If interrupted by wake word, we still want to skip_wakeword (which is set above)
+            # but we can print a message
            if not completed:
                print("⏹️ Ответ прерван - слушаю следующий вопрос")
-                skip_wakeword = True
                continue

            print()
            print("-" * 30)
            print()

-            # Step 6: Loop continues...
+            # Step 6: Loop continues with skip_wakeword=True

        except KeyboardInterrupt:
            signal_handler(None, None)
--- a/stt.py
+++ b/stt.py
@@ -34,12 +34,13 @@ class SpeechRecognizer:
        )
        print("✅ Модель Vosk загружена")
    
-    def listen(self, timeout_seconds: float = 5.0) -> str:
+    def listen(self, timeout_seconds: float = 5.0, detection_timeout: float = None) -> str:
        """
        Listen to microphone and transcribe speech.
        
        Args:
            timeout_seconds: Maximum time to listen for speech
+            detection_timeout: Time to wait for speech to start. If None, uses timeout_seconds.
            
        Returns:
            Transcribed text from speech
@@ -53,10 +54,13 @@ class SpeechRecognizer:
        self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE)
        
        frames_to_read = int(SAMPLE_RATE * timeout_seconds / 4096)
+        detection_frames = int(SAMPLE_RATE * detection_timeout / 4096) if detection_timeout else frames_to_read
+        
        silence_frames = 0
        max_silence_frames = 10  # About 2.5 seconds of silence
+        speech_started = False
        
-        for _ in range(frames_to_read):
+        for i in range(frames_to_read):
            data = self.stream.read(4096, exception_on_overflow=False)
            
            if self.recognizer.AcceptWaveform(data):
@@ -71,9 +75,14 @@ class SpeechRecognizer:
                partial = json.loads(self.recognizer.PartialResult())
                if partial.get("partial", ""):
                    silence_frames = 0
+                    speech_started = True
                else:
                    silence_frames += 1
            
+            # Check detection timeout
+            if not speech_started and i > detection_frames:
+                break
+            
            # Stop if too much silence after speech
            if silence_frames > max_silence_frames:
                break
@@ -85,7 +94,9 @@ class SpeechRecognizer:
        if text:
            print(f"📝 Распознано: {text}")
        else:
-            print("⚠️ Речь не распознана")
+            # Only print if we weren't just checking for presence of speech
+            if not detection_timeout or speech_started:
+                print("⚠️ Речь не распознана")
        
        return text
    
@@ -109,9 +120,9 @@ def get_recognizer() -> SpeechRecognizer:
    return _recognizer


-def listen(timeout_seconds: float = 5.0) -> str:
+def listen(timeout_seconds: float = 5.0, detection_timeout: float = None) -> str:
    """Listen to microphone and return transcribed text."""
-    return get_recognizer().listen(timeout_seconds)
+    return get_recognizer().listen(timeout_seconds, detection_timeout)


 def cleanup():