From 1b4d46e3870eb5b2fd3765c22c3926da97d4d9bb Mon Sep 17 00:00:00 2001
From: nvfuture <nearkaido@gmail.com>
Date: Sun, 4 Jan 2026 20:08:49 +0300
Subject: [PATCH] second commit, before making function of memory in api query

---
 ai.py   | 10 ++++----
 main.py | 73 +++++++++++++++++++++++++++++++++++----------------------
 stt.py  | 21 +++++++++++++----
 3 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/ai.py b/ai.py
index ad56e23..34daea0 100644
--- a/ai.py
+++ b/ai.py
@@ -7,10 +7,12 @@ from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL
 
 
 # System prompt for the AI
-SYSTEM_PROMPT = """Ты — голосовой ассистент умной колонки. 
-Отвечай кратко, по существу, на русском языке.
-Избегай длинных списков и сложного форматирования.
-Твои ответы будут озвучены голосом, поэтому пиши естественным разговорным языком."""
+SYSTEM_PROMPT = """Ты — Александр, умный голосовой ассистент с человеческим поведением.
+Веди себя как живой человек: будь дружелюбным, естественным и немного эмоциональным, где это уместно.
+Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
+Отвечай кратко и по существу, на русском языке.
+Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
+Пиши в разговорном стиле, как при живом общении."""
 
 
 def ask_ai(user_message: str) -> str:
diff --git a/main.py b/main.py
index bf646fc..149789e 100644
--- a/main.py
+++ b/main.py
@@ -10,11 +10,12 @@ Flow:
 5. Speak response (TTS)
 6. Loop back to step 1
 """
+
 import signal
 import sys
 
 from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
-from stt import listen, cleanup as cleanup_stt
+from stt import listen, cleanup as cleanup_stt, get_recognizer
 from ai import ask_ai
 from cleaner import clean_response
 from tts import speak, initialize as init_tts
@@ -38,76 +39,92 @@ def main():
     print("Нажмите Ctrl+C для выхода")
     print("=" * 50)
     print()
-    
+
     # Setup signal handler for graceful exit
     signal.signal(signal.SIGINT, signal_handler)
-    
-    # Pre-initialize TTS model (takes a few seconds)
-    print("⏳ Инициализация...")
-    init_tts()
+
+    # Pre-initialize models (takes a few seconds)
+    print("⏳ Инициализация моделей...")
+    get_recognizer().initialize()  # Initialize STT model first
+    init_tts()  # Then initialize TTS model
     print()
-    
+
     # Main loop
     skip_wakeword = False
     while True:
         try:
-            # Step 1: Wait for wake word
+            # Step 1: Wait for wake word or Follow-up listen
             if not skip_wakeword:
                 wait_for_wakeword()
-            
+                # Standard listen after activation
+                user_text = listen(timeout_seconds=7.0)
+            else:
+                # Follow-up listen (wait 2.0s for start, then listen long)
+                print("👂 Слушаю продолжение диалога...")
+                user_text = listen(timeout_seconds=20.0, detection_timeout=2.0)
+                
+                if not user_text:
+                    # User didn't continue conversation, go back to sleep
+                    skip_wakeword = False
+                    continue
+
+            # Reset flag for now (will be set to True if we speak successfully)
             skip_wakeword = False
-            
-            # Step 2: Listen to user speech
-            user_text = listen(timeout_seconds=7.0)
-            
+
+            # Step 2: Check if speech was recognized
             if not user_text:
                 speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
                 continue
-            
+
             # Check for volume command
             if user_text.lower().startswith("громкость"):
                 try:
                     # Remove "громкость" prefix and strip whitespace
                     vol_str = user_text.lower().replace("громкость", "", 1).strip()
-                    
+
                     # Try to parse the number
                     level = parse_volume_text(vol_str)
-                    
+
                     if level is not None:
                         if set_volume(level):
                             speak(f"Громкость установлена на {level}")
                         else:
                             speak("Не удалось установить громкость.")
                     else:
-                        speak("Я не понял число громкости. Скажите число от одного до десяти.")
-                    
+                        speak(
+                            "Я не понял число громкости. Скажите число от одного до десяти."
+                        )
+
                     continue
                 except Exception as e:
                     print(f"❌ Ошибка громкости: {e}")
                     speak("Не удалось изменить громкость.")
                     continue
-            
+
             # Step 3: Send to AI
             ai_response = ask_ai(user_text)
-            
+
             # Step 4: Clean response
             clean_text = clean_response(ai_response)
-            
+
             # Step 5: Speak response (with wake word interrupt support)
             completed = speak(clean_text, check_interrupt=check_wakeword_once)
-            
-            # If interrupted by wake word, go back to waiting for wake word
+
+            # Enable follow-up mode for next iteration
+            skip_wakeword = True
+
+            # If interrupted by wake word, we still want to skip_wakeword (which is set above)
+            # but we can print a message
             if not completed:
                 print("⏹️ Ответ прерван - слушаю следующий вопрос")
-                skip_wakeword = True
                 continue
-            
+
             print()
             print("-" * 30)
             print()
-            
-            # Step 6: Loop continues...
-            
+
+            # Step 6: Loop continues with skip_wakeword=True
+
         except KeyboardInterrupt:
             signal_handler(None, None)
         except Exception as e:
diff --git a/stt.py b/stt.py
index 8cde3e2..554a490 100644
--- a/stt.py
+++ b/stt.py
@@ -34,12 +34,13 @@ class SpeechRecognizer:
         )
         print("✅ Модель Vosk загружена")
     
-    def listen(self, timeout_seconds: float = 5.0) -> str:
+    def listen(self, timeout_seconds: float = 5.0, detection_timeout: float = None) -> str:
         """
         Listen to microphone and transcribe speech.
         
         Args:
             timeout_seconds: Maximum time to listen for speech
+            detection_timeout: Time to wait for speech to start. If None, uses timeout_seconds.
             
         Returns:
             Transcribed text from speech
@@ -53,10 +54,13 @@ class SpeechRecognizer:
         self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE)
         
         frames_to_read = int(SAMPLE_RATE * timeout_seconds / 4096)
+        detection_frames = int(SAMPLE_RATE * detection_timeout / 4096) if detection_timeout else frames_to_read
+        
         silence_frames = 0
         max_silence_frames = 10  # About 2.5 seconds of silence
+        speech_started = False
         
-        for _ in range(frames_to_read):
+        for i in range(frames_to_read):
             data = self.stream.read(4096, exception_on_overflow=False)
             
             if self.recognizer.AcceptWaveform(data):
@@ -71,9 +75,14 @@ class SpeechRecognizer:
                 partial = json.loads(self.recognizer.PartialResult())
                 if partial.get("partial", ""):
                     silence_frames = 0
+                    speech_started = True
                 else:
                     silence_frames += 1
             
+            # Check detection timeout
+            if not speech_started and i > detection_frames:
+                break
+            
             # Stop if too much silence after speech
             if silence_frames > max_silence_frames:
                 break
@@ -85,7 +94,9 @@ class SpeechRecognizer:
         if text:
             print(f"📝 Распознано: {text}")
         else:
-            print("⚠️ Речь не распознана")
+            # Only print if we weren't just checking for presence of speech
+            if not detection_timeout or speech_started:
+                print("⚠️ Речь не распознана")
         
         return text
     
@@ -109,9 +120,9 @@ def get_recognizer() -> SpeechRecognizer:
     return _recognizer
 
 
-def listen(timeout_seconds: float = 5.0) -> str:
+def listen(timeout_seconds: float = 5.0, detection_timeout: float = None) -> str:
     """Listen to microphone and return transcribed text."""
-    return get_recognizer().listen(timeout_seconds)
+    return get_recognizer().listen(timeout_seconds, detection_timeout)
 
 
 def cleanup():