после решения проблемы с падежами и добавления памяти

This commit is contained in:
2026-01-07 15:43:36 +03:00
parent 1b4d46e387
commit ebaed3fbbe
6 changed files with 269 additions and 32 deletions

36
ai.py
View File

@@ -2,6 +2,7 @@
AI module for Perplexity API integration. AI module for Perplexity API integration.
Sends user queries and receives AI responses. Sends user queries and receives AI responses.
""" """
import requests import requests
from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL
@@ -12,45 +13,48 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
Твоя главная цель — помогать пользователю и поддерживать интересный диалог. Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
Отвечай кратко и по существу, на русском языке. Отвечай кратко и по существу, на русском языке.
Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом. Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
Пиши в разговорном стиле, как при живом общении.""" Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов."""
def ask_ai(user_message: str) -> str: def ask_ai(messages_history: list) -> str:
""" """
Send a message to Perplexity AI and get a response. Send a message history to Perplexity AI and get a response.
Args: Args:
user_message: User's question or command messages_history: List of dictionaries with role and content
e.g., [{"role": "user", "content": "Hi"}]
Returns: Returns:
AI response text AI response text
""" """
if not user_message.strip(): if not messages_history:
return "Извините, я не расслышал вашу команду." return "Извините, я не расслышал вашу команду."
print(f"🤖 Запрос к AI: {user_message}") # Extract the last user message for logging
last_user_message = next(
(m["content"] for m in reversed(messages_history) if m["role"] == "user"),
"Unknown",
)
print(f"🤖 Запрос к AI: {last_user_message}")
headers = { headers = {
"Authorization": f"Bearer {PERPLEXITY_API_KEY}", "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
"Content-Type": "application/json" "Content-Type": "application/json",
} }
# Prepend system prompt to the history
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + list(messages_history)
payload = { payload = {
"model": PERPLEXITY_MODEL, "model": PERPLEXITY_MODEL,
"messages": [ "messages": messages,
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message}
],
"max_tokens": 500, "max_tokens": 500,
"temperature": 0.7 "temperature": 1.0,
} }
try: try:
response = requests.post( response = requests.post(
PERPLEXITY_API_URL, PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
headers=headers,
json=payload,
timeout=30
) )
response.raise_for_status() response.raise_for_status()

View File

@@ -1,8 +1,196 @@
""" """
Response cleaner module. Response cleaner module.
Removes markdown formatting and special characters from AI responses. Removes markdown formatting and special characters from AI responses.
Handles complex number-to-text conversion for Russian language.
""" """
import re import re
import pymorphy3
from num2words import num2words
# Initialize morphological analyzer
morph = pymorphy3.MorphAnalyzer()
# Preposition to case mapping (simplified heuristics)
PREPOSITION_CASES = {
'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
'во': 'loct',
'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe)
'о': 'loct',
'об': 'loct',
'обо': 'loct',
'при': 'loct',
'у': 'gent',
'от': 'gent',
'до': 'gent',
'из': 'gent',
'с': 'gent', # or ablt (instrumental)
'со': 'gent',
'без': 'gent',
'для': 'gent',
'вокруг': 'gent',
'после': 'gent',
'к': 'datv',
'ко': 'datv',
'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually.
'над': 'ablt',
'под': 'ablt',
'перед': 'ablt',
'за': 'ablt', # or acc
'между': 'ablt',
}
# Mapping pymorphy cases to num2words cases
PYMORPHY_TO_NUM2WORDS = {
'nomn': 'nominative',
'gent': 'genitive',
'datv': 'dative',
'accs': 'accusative',
'ablt': 'instrumental',
'loct': 'prepositional',
'voct': 'nominative', # Fallback
'gen2': 'genitive',
'acc2': 'accusative',
'loc2': 'prepositional',
}
# Month names in Genitive case (as they appear in dates)
MONTHS_GENITIVE = [
'января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'
]
def get_case_from_preposition(prep_token):
"""Return pymorphy case based on preposition."""
if not prep_token:
return None
return PREPOSITION_CASES.get(prep_token.lower())
def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
"""Convert a number string to words with specific parameters."""
try:
# Handle floats
if '.' in number_str or ',' in number_str:
num_val = float(number_str.replace(',', '.'))
else:
num_val = int(number_str)
return num2words(
num_val,
lang='ru',
to=context_type,
case=case,
gender=gender
)
except Exception as e:
print(f"Error converting number {number_str}: {e}")
return number_str
def numbers_to_words(text: str) -> str:
"""
Intelligent conversion of digits in text to Russian words.
Handles years, dates, and basic case agreement.
"""
if not text:
return ""
# 1. Identify "Year" patterns: "1999 год", "в 2024 году"
def replace_year_match(match):
full_str = match.group(0)
prep = match.group(1) # Could be None
year_str = match.group(2)
year_word = match.group(3) # год, году, года...
parsed = morph.parse(year_word)[0]
case_tag = parsed.tag.case
if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
pass
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative')
words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m')
prefix = f"{prep} " if prep else ""
return f"{prefix}{words} {year_word}"
text = re.sub(
r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b',
replace_year_match,
text
)
# 2. Identify "Date" patterns: "25 июня", "с 1 мая"
# Matches: (Preposition)? (Day) (Month_Genitive)
# Day is usually 1-31.
month_regex = '|'.join(MONTHS_GENITIVE)
def replace_date_match(match):
prep = match.group(1)
day_str = match.group(2)
month_word = match.group(3)
# Determine case
# Default to Genitive ("25 июня" -> "двадцать пятого июня")
case = 'genitive'
if prep:
prep_clean = prep.strip().lower()
# Specific overrides for dates
if prep_clean == 'на':
case = 'accusative' # на 5 мая -> на пятое
elif prep_clean == 'по':
case = 'accusative' # по 5 мая -> по пятое (limit)
elif prep_clean == 'к':
case = 'dative' # к 5 мая -> к пятому
elif prep_clean in ['с', 'до', 'от']:
case = 'genitive' # с 5 мая -> с пятого
else:
# Fallback to general preposition map
morph_case = get_case_from_preposition(prep_clean)
if morph_case:
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive')
# Convert to Ordinal
# Dates are neuter ("число" implies neuter: "пятое", "пятого")
# However, num2words for genitive ordinal:
# 5, ordinal, genitive -> "пятого" (masc/neut are same)
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
words = convert_number(day_str, context_type='ordinal', case=case, gender='n')
prefix = f"{prep} " if prep else ""
return f"{prefix}{words} {month_word}"
text = re.sub(
r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b',
replace_date_match,
text
)
# 3. Handle remaining numbers (Cardinals)
def replace_cardinal_match(match):
prep = match.group(1)
num_str = match.group(2)
case = 'nominative'
if prep:
morph_case = get_case_from_preposition(prep.strip())
if morph_case:
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative')
words = convert_number(num_str, context_type='cardinal', case=case)
prefix = f"{prep} " if prep else ""
return f"{prefix}{words}"
text = re.sub(
r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b',
replace_cardinal_match,
text
)
return text
def clean_response(text: str) -> str: def clean_response(text: str) -> str:
@@ -19,9 +207,10 @@ def clean_response(text: str) -> str:
return "" return ""
# Remove citation references like [1], [2], [citation], etc. # Remove citation references like [1], [2], [citation], etc.
text = re.sub(r'\[\d+\]', '', text) # Using hex escapes for brackets to avoid escaping issues
text = re.sub(r'\[citation\s*needed\]', '', text, flags=re.IGNORECASE) text = re.sub(r'\x5B\d+\x5D', '', text)
text = re.sub(r'\[source\]', '', text, flags=re.IGNORECASE) text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE)
text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE)
# Remove markdown bold **text** and __text__ # Remove markdown bold **text** and __text__
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
@@ -38,10 +227,10 @@ def clean_response(text: str) -> str:
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE) text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
# Remove markdown links [text](url) -> text # Remove markdown links [text](url) -> text
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text)
# Remove markdown images ![alt](url) # Remove markdown images ![alt](url)
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text) text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text)
# Remove inline code `code` # Remove inline code `code`
text = re.sub(r'`([^`]+)`', r'\1', text) text = re.sub(r'`([^`]+)`', r'\1', text)
@@ -62,6 +251,9 @@ def clean_response(text: str) -> str:
# Remove HTML tags if any # Remove HTML tags if any
text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'<[^>]+>', '', text)
# Convert numbers to words (Russian)
text = numbers_to_words(text)
# Remove extra whitespace # Remove extra whitespace
text = re.sub(r'\n{3,}', '\n\n', text) text = re.sub(r'\n{3,}', '\n\n', text)
text = re.sub(r' +', ' ', text) text = re.sub(r' +', ' ', text)

View File

@@ -14,7 +14,7 @@ BASE_DIR = Path(__file__).parent
# Perplexity API configuration # Perplexity API configuration
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY") PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-online") PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-128k-chat")
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
# Porcupine configuration # Porcupine configuration

21
main.py
View File

@@ -13,6 +13,7 @@ Flow:
import signal import signal
import sys import sys
from collections import deque
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
from stt import listen, cleanup as cleanup_stt, get_recognizer from stt import listen, cleanup as cleanup_stt, get_recognizer
@@ -49,6 +50,9 @@ def main():
init_tts() # Then initialize TTS model init_tts() # Then initialize TTS model
print() print()
# Initialize chat history (last 10 exchanges = 20 messages)
chat_history = deque(maxlen=20)
# Main loop # Main loop
skip_wakeword = False skip_wakeword = False
while True: while True:
@@ -76,6 +80,14 @@ def main():
speak("Извините, я вас не расслышал. Попробуйте ещё раз.") speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
continue continue
# Check for stop commands
user_text_lower = user_text.lower().strip()
if user_text_lower in ["стоп", "александр", "стоп александр"]:
print("_" * 50)
print("💤 Жду 'Alexandr' для активации...")
skip_wakeword = False
continue
# Check for volume command # Check for volume command
if user_text.lower().startswith("громкость"): if user_text.lower().startswith("громкость"):
try: try:
@@ -102,7 +114,14 @@ def main():
continue continue
# Step 3: Send to AI # Step 3: Send to AI
ai_response = ask_ai(user_text) # Add user message to history
chat_history.append({"role": "user", "content": user_text})
# Get response using history
ai_response = ask_ai(list(chat_history))
# Add AI response to history
chat_history.append({"role": "assistant", "content": ai_response})
# Step 4: Clean response # Step 4: Clean response
clean_text = clean_response(ai_response) clean_text = clean_response(ai_response)

View File

@@ -24,3 +24,5 @@ omegaconf>=2.3.0
# Utils # Utils
numpy>=1.24.0 numpy>=1.24.0
num2words
pymorphy3

20
test_cleaner.py Normal file
View File

@@ -0,0 +1,20 @@
import cleaner
import traceback
try:
print("Testing cleaner...")
text = "В 1999 году."
res = cleaner.clean_response(text)
print(f"Result: {res}")
text = "![image](http://example.com)"
res = cleaner.clean_response(text)
print(f"Result: {res}")
text = "[link](http://example.com)"
res = cleaner.clean_response(text)
print(f"Result: {res}")
except Exception:
traceback.print_exc()