после решения проблемы с падежами и добавления памяти

2026-01-07 15:43:36 +03:00
parent 1b4d46e387
commit ebaed3fbbe
6 changed files with 269 additions and 32 deletions
--- a/ai.py
+++ b/ai.py
@@ -2,6 +2,7 @@
 AI module for Perplexity API integration.
 Sends user queries and receives AI responses.
 """
+
 import requests
 from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL

@@ -12,45 +13,48 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
 Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
 Отвечай кратко и по существу, на русском языке.
 Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
-Пиши в разговорном стиле, как при живом общении."""
+Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов."""


-def ask_ai(user_message: str) -> str:
+def ask_ai(messages_history: list) -> str:
    """
-    Send a message to Perplexity AI and get a response.
+    Send a message history to Perplexity AI and get a response.

    Args:
-        user_message: User's question or command
+        messages_history: List of dictionaries with role and content
+                         e.g., [{"role": "user", "content": "Hi"}]

    Returns:
        AI response text
    """
-    if not user_message.strip():
+    if not messages_history:
        return "Извините, я не расслышал вашу команду."

-    print(f"🤖 Запрос к AI: {user_message}")
+    # Extract the last user message for logging
+    last_user_message = next(
+        (m["content"] for m in reversed(messages_history) if m["role"] == "user"),
+        "Unknown",
+    )
+    print(f"🤖 Запрос к AI: {last_user_message}")

    headers = {
        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
-        "Content-Type": "application/json"
+        "Content-Type": "application/json",
    }

+    # Prepend system prompt to the history
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + list(messages_history)
+
    payload = {
        "model": PERPLEXITY_MODEL,
-        "messages": [
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": user_message}
-        ],
+        "messages": messages,
        "max_tokens": 500,
-        "temperature": 0.7
+        "temperature": 1.0,
    }

    try:
        response = requests.post(
-            PERPLEXITY_API_URL,
-            headers=headers,
-            json=payload,
-            timeout=30
+            PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
        )
        response.raise_for_status()

--- a/cleaner.py
+++ b/cleaner.py
@@ -1,8 +1,196 @@
 """
 Response cleaner module.
 Removes markdown formatting and special characters from AI responses.
+Handles complex number-to-text conversion for Russian language.
 """
 import re
+import pymorphy3
+from num2words import num2words
+
+# Initialize morphological analyzer
+morph = pymorphy3.MorphAnalyzer()
+
+# Preposition to case mapping (simplified heuristics)
+PREPOSITION_CASES = {
+    'в': 'loct',  # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
+    'во': 'loct',
+    'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe)
+    'о': 'loct',
+    'об': 'loct',
+    'обо': 'loct',
+    'при': 'loct',
+    'у': 'gent',
+    'от': 'gent',
+    'до': 'gent',
+    'из': 'gent',
+    'с': 'gent',  # or ablt (instrumental)
+    'со': 'gent',
+    'без': 'gent',
+    'для': 'gent',
+    'вокруг': 'gent',
+    'после': 'gent',
+    'к': 'datv',
+    'ко': 'datv',
+    'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually.
+    'над': 'ablt',
+    'под': 'ablt',
+    'перед': 'ablt',
+    'за': 'ablt', # or acc
+    'между': 'ablt',
+}
+
+# Mapping pymorphy cases to num2words cases
+PYMORPHY_TO_NUM2WORDS = {
+    'nomn': 'nominative',
+    'gent': 'genitive',
+    'datv': 'dative',
+    'accs': 'accusative',
+    'ablt': 'instrumental',
+    'loct': 'prepositional',
+    'voct': 'nominative', # Fallback
+    'gen2': 'genitive',
+    'acc2': 'accusative',
+    'loc2': 'prepositional',
+}
+
+# Month names in Genitive case (as they appear in dates)
+MONTHS_GENITIVE = [
+    'января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
+    'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'
+]
+
+def get_case_from_preposition(prep_token):
+    """Return pymorphy case based on preposition."""
+    if not prep_token:
+        return None
+    return PREPOSITION_CASES.get(prep_token.lower())
+
+def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
+    """Convert a number string to words with specific parameters."""
+    try:
+        # Handle floats
+        if '.' in number_str or ',' in number_str:
+            num_val = float(number_str.replace(',', '.'))
+        else:
+            num_val = int(number_str)
+        
+        return num2words(
+            num_val, 
+            lang='ru', 
+            to=context_type, 
+            case=case, 
+            gender=gender
+        )
+    except Exception as e:
+        print(f"Error converting number {number_str}: {e}")
+        return number_str
+
+def numbers_to_words(text: str) -> str:
+    """
+    Intelligent conversion of digits in text to Russian words.
+    Handles years, dates, and basic case agreement.
+    """
+    if not text:
+        return ""
+
+    # 1. Identify "Year" patterns: "1999 год", "в 2024 году"
+    def replace_year_match(match):
+        full_str = match.group(0)
+        prep = match.group(1) # Could be None
+        year_str = match.group(2)
+        year_word = match.group(3) # год, году, года...
+        
+        parsed = morph.parse(year_word)[0]
+        case_tag = parsed.tag.case
+        
+        if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
+             pass
+
+        nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative')
+        
+        words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m')
+        
+        prefix = f"{prep} " if prep else ""
+        return f"{prefix}{words} {year_word}"
+
+    text = re.sub(
+        r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b',
+        replace_year_match,
+        text
+    )
+
+    # 2. Identify "Date" patterns: "25 июня", "с 1 мая"
+    # Matches: (Preposition)? (Day) (Month_Genitive)
+    # Day is usually 1-31.
+    month_regex = '|'.join(MONTHS_GENITIVE)
+    
+    def replace_date_match(match):
+        prep = match.group(1)
+        day_str = match.group(2)
+        month_word = match.group(3)
+        
+        # Determine case
+        # Default to Genitive ("25 июня" -> "двадцать пятого июня")
+        case = 'genitive' 
+        
+        if prep:
+            prep_clean = prep.strip().lower()
+            # Specific overrides for dates
+            if prep_clean == 'на':
+                case = 'accusative' # на 5 мая -> на пятое
+            elif prep_clean == 'по':
+                case = 'accusative' # по 5 мая -> по пятое (limit)
+            elif prep_clean == 'к':
+                case = 'dative' # к 5 мая -> к пятому
+            elif prep_clean in ['с', 'до', 'от']:
+                case = 'genitive' # с 5 мая -> с пятого
+            else:
+                # Fallback to general preposition map
+                morph_case = get_case_from_preposition(prep_clean)
+                if morph_case:
+                    case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive')
+
+        # Convert to Ordinal
+        # Dates are neuter ("число" implies neuter: "пятое", "пятого")
+        # However, num2words for genitive ordinal:
+        # 5, ordinal, genitive -> "пятого" (masc/neut are same)
+        # 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
+        # Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
+        
+        words = convert_number(day_str, context_type='ordinal', case=case, gender='n')
+        
+        prefix = f"{prep} " if prep else ""
+        return f"{prefix}{words} {month_word}"
+
+    text = re.sub(
+        r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b',
+        replace_date_match,
+        text
+    )
+
+    # 3. Handle remaining numbers (Cardinals)
+    def replace_cardinal_match(match):
+        prep = match.group(1)
+        num_str = match.group(2)
+        
+        case = 'nominative'
+        if prep:
+            morph_case = get_case_from_preposition(prep.strip())
+            if morph_case:
+                case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative')
+        
+        words = convert_number(num_str, context_type='cardinal', case=case)
+        
+        prefix = f"{prep} " if prep else ""
+        return f"{prefix}{words}"
+
+    text = re.sub(
+        r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b',
+        replace_cardinal_match,
+        text
+    )
+    
+    return text


 def clean_response(text: str) -> str:
@@ -19,9 +207,10 @@ def clean_response(text: str) -> str:
        return ""
    
    # Remove citation references like [1], [2], [citation], etc.
-    text = re.sub(r'\[\d+\]', '', text)
-    text = re.sub(r'\[citation\s*needed\]', '', text, flags=re.IGNORECASE)
-    text = re.sub(r'\[source\]', '', text, flags=re.IGNORECASE)
+    # Using hex escapes for brackets to avoid escaping issues
+    text = re.sub(r'\x5B\d+\x5D', '', text)
+    text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE)
+    text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE)
    
    # Remove markdown bold **text** and __text__
    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
@@ -38,10 +227,10 @@ def clean_response(text: str) -> str:
    text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
    
    # Remove markdown links [text](url) -> text
-    text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
+    text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text)
    
    # Remove markdown images ![alt](url)
-    text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
+    text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text)
    
    # Remove inline code `code`
    text = re.sub(r'`([^`]+)`', r'\1', text)
@@ -62,6 +251,9 @@ def clean_response(text: str) -> str:
    # Remove HTML tags if any
    text = re.sub(r'<[^>]+>', '', text)
    
+    # Convert numbers to words (Russian)
+    text = numbers_to_words(text)
+    
    # Remove extra whitespace
    text = re.sub(r'\n{3,}', '\n\n', text)
    text = re.sub(r'  +', ' ', text)
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ BASE_DIR = Path(__file__).parent

 # Perplexity API configuration
 PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
-PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-online")
+PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-128k-chat")
 PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"

 # Porcupine configuration
--- a/main.py
+++ b/main.py
@@ -13,6 +13,7 @@ Flow:

 import signal
 import sys
+from collections import deque

 from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
 from stt import listen, cleanup as cleanup_stt, get_recognizer
@@ -49,6 +50,9 @@ def main():
    init_tts()  # Then initialize TTS model
    print()

+    # Initialize chat history (last 10 exchanges = 20 messages)
+    chat_history = deque(maxlen=20)
+
    # Main loop
    skip_wakeword = False
    while True:
@@ -76,6 +80,14 @@ def main():
                speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
                continue

+            # Check for stop commands
+            user_text_lower = user_text.lower().strip()
+            if user_text_lower in ["стоп", "александр", "стоп александр"]:
+                print("_" * 50)
+                print("💤 Жду 'Alexandr' для активации...")
+                skip_wakeword = False
+                continue
+
            # Check for volume command
            if user_text.lower().startswith("громкость"):
                try:
@@ -102,7 +114,14 @@ def main():
                    continue

            # Step 3: Send to AI
-            ai_response = ask_ai(user_text)
+            # Add user message to history
+            chat_history.append({"role": "user", "content": user_text})
+            
+            # Get response using history
+            ai_response = ask_ai(list(chat_history))
+            
+            # Add AI response to history
+            chat_history.append({"role": "assistant", "content": ai_response})

            # Step 4: Clean response
            clean_text = clean_response(ai_response)
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,3 +24,5 @@ omegaconf>=2.3.0

 # Utils
 numpy>=1.24.0
+num2words
+pymorphy3
--- a/test_cleaner.py
+++ b/test_cleaner.py
@@ -0,0 +1,20 @@
+
+import cleaner
+import traceback
+
+try:
+    print("Testing cleaner...")
+    text = "В 1999 году."
+    res = cleaner.clean_response(text)
+    print(f"Result: {res}")
+    
+    text = "![image](http://example.com)"
+    res = cleaner.clean_response(text)
+    print(f"Result: {res}")
+    
+    text = "[link](http://example.com)"
+    res = cleaner.clean_response(text)
+    print(f"Result: {res}")
+
+except Exception:
+    traceback.print_exc()