diff --git a/ai.py b/ai.py index 34daea0..cfbbb70 100644 --- a/ai.py +++ b/ai.py @@ -2,6 +2,7 @@ AI module for Perplexity API integration. Sends user queries and receives AI responses. """ + import requests from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL @@ -12,53 +13,56 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а Твоя главная цель — помогать пользователю и поддерживать интересный диалог. Отвечай кратко и по существу, на русском языке. Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом. -Пиши в разговорном стиле, как при живом общении.""" +Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов.""" -def ask_ai(user_message: str) -> str: +def ask_ai(messages_history: list) -> str: """ - Send a message to Perplexity AI and get a response. - + Send a message history to Perplexity AI and get a response. + Args: - user_message: User's question or command - + messages_history: List of dictionaries with role and content + e.g., [{"role": "user", "content": "Hi"}] + Returns: AI response text """ - if not user_message.strip(): + if not messages_history: return "Извините, я не расслышал вашу команду." - - print(f"🤖 Запрос к AI: {user_message}") - + + # Extract the last user message for logging + last_user_message = next( + (m["content"] for m in reversed(messages_history) if m["role"] == "user"), + "Unknown", + ) + print(f"🤖 Запрос к AI: {last_user_message}") + headers = { "Authorization": f"Bearer {PERPLEXITY_API_KEY}", - "Content-Type": "application/json" + "Content-Type": "application/json", } - + + # Prepend system prompt to the history + messages = [{"role": "system", "content": SYSTEM_PROMPT}] + list(messages_history) + payload = { "model": PERPLEXITY_MODEL, - "messages": [ - {"role": "system", "content": SYSTEM_PROMPT}, - {"role": "user", "content": user_message} - ], + "messages": messages, "max_tokens": 500, - "temperature": 0.7 + "temperature": 1.0, } - + try: response = requests.post( - PERPLEXITY_API_URL, - headers=headers, - json=payload, - timeout=30 + PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30 ) response.raise_for_status() - + data = response.json() ai_response = data["choices"][0]["message"]["content"] print(f"💬 Ответ AI: {ai_response[:100]}...") return ai_response - + except requests.exceptions.Timeout: return "Извините, сервер не отвечает. Попробуйте позже." except requests.exceptions.RequestException as e: diff --git a/cleaner.py b/cleaner.py index 75eee26..1bbcb12 100644 --- a/cleaner.py +++ b/cleaner.py @@ -1,8 +1,196 @@ """ Response cleaner module. Removes markdown formatting and special characters from AI responses. +Handles complex number-to-text conversion for Russian language. """ import re +import pymorphy3 +from num2words import num2words + +# Initialize morphological analyzer +morph = pymorphy3.MorphAnalyzer() + +# Preposition to case mapping (simplified heuristics) +PREPOSITION_CASES = { + 'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct + 'во': 'loct', + 'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe) + 'о': 'loct', + 'об': 'loct', + 'обо': 'loct', + 'при': 'loct', + 'у': 'gent', + 'от': 'gent', + 'до': 'gent', + 'из': 'gent', + 'с': 'gent', # or ablt (instrumental) + 'со': 'gent', + 'без': 'gent', + 'для': 'gent', + 'вокруг': 'gent', + 'после': 'gent', + 'к': 'datv', + 'ко': 'datv', + 'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually. + 'над': 'ablt', + 'под': 'ablt', + 'перед': 'ablt', + 'за': 'ablt', # or acc + 'между': 'ablt', +} + +# Mapping pymorphy cases to num2words cases +PYMORPHY_TO_NUM2WORDS = { + 'nomn': 'nominative', + 'gent': 'genitive', + 'datv': 'dative', + 'accs': 'accusative', + 'ablt': 'instrumental', + 'loct': 'prepositional', + 'voct': 'nominative', # Fallback + 'gen2': 'genitive', + 'acc2': 'accusative', + 'loc2': 'prepositional', +} + +# Month names in Genitive case (as they appear in dates) +MONTHS_GENITIVE = [ + 'января', 'февраля', 'марта', 'апреля', 'мая', 'июня', + 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря' +] + +def get_case_from_preposition(prep_token): + """Return pymorphy case based on preposition.""" + if not prep_token: + return None + return PREPOSITION_CASES.get(prep_token.lower()) + +def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'): + """Convert a number string to words with specific parameters.""" + try: + # Handle floats + if '.' in number_str or ',' in number_str: + num_val = float(number_str.replace(',', '.')) + else: + num_val = int(number_str) + + return num2words( + num_val, + lang='ru', + to=context_type, + case=case, + gender=gender + ) + except Exception as e: + print(f"Error converting number {number_str}: {e}") + return number_str + +def numbers_to_words(text: str) -> str: + """ + Intelligent conversion of digits in text to Russian words. + Handles years, dates, and basic case agreement. + """ + if not text: + return "" + + # 1. Identify "Year" patterns: "1999 год", "в 2024 году" + def replace_year_match(match): + full_str = match.group(0) + prep = match.group(1) # Could be None + year_str = match.group(2) + year_word = match.group(3) # год, году, года... + + parsed = morph.parse(year_word)[0] + case_tag = parsed.tag.case + + if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']: + pass + + nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative') + + words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m') + + prefix = f"{prep} " if prep else "" + return f"{prefix}{words} {year_word}" + + text = re.sub( + r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b', + replace_year_match, + text + ) + + # 2. Identify "Date" patterns: "25 июня", "с 1 мая" + # Matches: (Preposition)? (Day) (Month_Genitive) + # Day is usually 1-31. + month_regex = '|'.join(MONTHS_GENITIVE) + + def replace_date_match(match): + prep = match.group(1) + day_str = match.group(2) + month_word = match.group(3) + + # Determine case + # Default to Genitive ("25 июня" -> "двадцать пятого июня") + case = 'genitive' + + if prep: + prep_clean = prep.strip().lower() + # Specific overrides for dates + if prep_clean == 'на': + case = 'accusative' # на 5 мая -> на пятое + elif prep_clean == 'по': + case = 'accusative' # по 5 мая -> по пятое (limit) + elif prep_clean == 'к': + case = 'dative' # к 5 мая -> к пятому + elif prep_clean in ['с', 'до', 'от']: + case = 'genitive' # с 5 мая -> с пятого + else: + # Fallback to general preposition map + morph_case = get_case_from_preposition(prep_clean) + if morph_case: + case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive') + + # Convert to Ordinal + # Dates are neuter ("число" implies neuter: "пятое", "пятого") + # However, num2words for genitive ordinal: + # 5, ordinal, genitive -> "пятого" (masc/neut are same) + # 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?) + # Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому). + + words = convert_number(day_str, context_type='ordinal', case=case, gender='n') + + prefix = f"{prep} " if prep else "" + return f"{prefix}{words} {month_word}" + + text = re.sub( + r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b', + replace_date_match, + text + ) + + # 3. Handle remaining numbers (Cardinals) + def replace_cardinal_match(match): + prep = match.group(1) + num_str = match.group(2) + + case = 'nominative' + if prep: + morph_case = get_case_from_preposition(prep.strip()) + if morph_case: + case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative') + + words = convert_number(num_str, context_type='cardinal', case=case) + + prefix = f"{prep} " if prep else "" + return f"{prefix}{words}" + + text = re.sub( + r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b', + replace_cardinal_match, + text + ) + + return text def clean_response(text: str) -> str: @@ -19,9 +207,10 @@ def clean_response(text: str) -> str: return "" # Remove citation references like [1], [2], [citation], etc. - text = re.sub(r'\[\d+\]', '', text) - text = re.sub(r'\[citation\s*needed\]', '', text, flags=re.IGNORECASE) - text = re.sub(r'\[source\]', '', text, flags=re.IGNORECASE) + # Using hex escapes for brackets to avoid escaping issues + text = re.sub(r'\x5B\d+\x5D', '', text) + text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE) + text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE) # Remove markdown bold **text** and __text__ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) @@ -38,10 +227,10 @@ def clean_response(text: str) -> str: text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE) # Remove markdown links [text](url) -> text - text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) + text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text) # Remove markdown images ![alt](url) - text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text) + text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text) # Remove inline code `code` text = re.sub(r'`([^`]+)`', r'\1', text) @@ -62,6 +251,9 @@ def clean_response(text: str) -> str: # Remove HTML tags if any text = re.sub(r'<[^>]+>', '', text) + # Convert numbers to words (Russian) + text = numbers_to_words(text) + # Remove extra whitespace text = re.sub(r'\n{3,}', '\n\n', text) text = re.sub(r' +', ' ', text) @@ -69,4 +261,4 @@ def clean_response(text: str) -> str: # Clean up and return text = text.strip() - return text + return text \ No newline at end of file diff --git a/config.py b/config.py index eea6f44..7847ec0 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ BASE_DIR = Path(__file__).parent # Perplexity API configuration PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") -PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-online") +PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-128k-chat") PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" # Porcupine configuration diff --git a/main.py b/main.py index 149789e..0e20171 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,7 @@ Flow: import signal import sys +from collections import deque from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once from stt import listen, cleanup as cleanup_stt, get_recognizer @@ -49,6 +50,9 @@ def main(): init_tts() # Then initialize TTS model print() + # Initialize chat history (last 10 exchanges = 20 messages) + chat_history = deque(maxlen=20) + # Main loop skip_wakeword = False while True: @@ -76,6 +80,14 @@ def main(): speak("Извините, я вас не расслышал. Попробуйте ещё раз.") continue + # Check for stop commands + user_text_lower = user_text.lower().strip() + if user_text_lower in ["стоп", "александр", "стоп александр"]: + print("_" * 50) + print("💤 Жду 'Alexandr' для активации...") + skip_wakeword = False + continue + # Check for volume command if user_text.lower().startswith("громкость"): try: @@ -102,7 +114,14 @@ def main(): continue # Step 3: Send to AI - ai_response = ask_ai(user_text) + # Add user message to history + chat_history.append({"role": "user", "content": user_text}) + + # Get response using history + ai_response = ask_ai(list(chat_history)) + + # Add AI response to history + chat_history.append({"role": "assistant", "content": ai_response}) # Step 4: Clean response clean_text = clean_response(ai_response) diff --git a/requirements.txt b/requirements.txt index 06a0404..18ed72d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,3 +24,5 @@ omegaconf>=2.3.0 # Utils numpy>=1.24.0 +num2words +pymorphy3 diff --git a/test_cleaner.py b/test_cleaner.py new file mode 100644 index 0000000..ee2869b --- /dev/null +++ b/test_cleaner.py @@ -0,0 +1,20 @@ + +import cleaner +import traceback + +try: + print("Testing cleaner...") + text = "В 1999 году." + res = cleaner.clean_response(text) + print(f"Result: {res}") + + text = "![image](http://example.com)" + res = cleaner.clean_response(text) + print(f"Result: {res}") + + text = "[link](http://example.com)" + res = cleaner.clean_response(text) + print(f"Result: {res}") + +except Exception: + traceback.print_exc()