после решения проблемы с падежами и добавления памяти
This commit is contained in:
52
ai.py
52
ai.py
@@ -2,6 +2,7 @@
|
|||||||
AI module for Perplexity API integration.
|
AI module for Perplexity API integration.
|
||||||
Sends user queries and receives AI responses.
|
Sends user queries and receives AI responses.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL
|
from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL
|
||||||
|
|
||||||
@@ -12,53 +13,56 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
|
|||||||
Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
|
Твоя главная цель — помогать пользователю и поддерживать интересный диалог.
|
||||||
Отвечай кратко и по существу, на русском языке.
|
Отвечай кратко и по существу, на русском языке.
|
||||||
Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
|
Избегай длинных списков, сложного форматирования и спецсимволов, так как твои ответы озвучиваются голосом.
|
||||||
Пиши в разговорном стиле, как при живом общении."""
|
Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов."""
|
||||||
|
|
||||||
|
|
||||||
def ask_ai(user_message: str) -> str:
|
def ask_ai(messages_history: list) -> str:
|
||||||
"""
|
"""
|
||||||
Send a message to Perplexity AI and get a response.
|
Send a message history to Perplexity AI and get a response.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
user_message: User's question or command
|
messages_history: List of dictionaries with role and content
|
||||||
|
e.g., [{"role": "user", "content": "Hi"}]
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
AI response text
|
AI response text
|
||||||
"""
|
"""
|
||||||
if not user_message.strip():
|
if not messages_history:
|
||||||
return "Извините, я не расслышал вашу команду."
|
return "Извините, я не расслышал вашу команду."
|
||||||
|
|
||||||
print(f"🤖 Запрос к AI: {user_message}")
|
# Extract the last user message for logging
|
||||||
|
last_user_message = next(
|
||||||
|
(m["content"] for m in reversed(messages_history) if m["role"] == "user"),
|
||||||
|
"Unknown",
|
||||||
|
)
|
||||||
|
print(f"🤖 Запрос к AI: {last_user_message}")
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Prepend system prompt to the history
|
||||||
|
messages = [{"role": "system", "content": SYSTEM_PROMPT}] + list(messages_history)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": PERPLEXITY_MODEL,
|
"model": PERPLEXITY_MODEL,
|
||||||
"messages": [
|
"messages": messages,
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
|
||||||
{"role": "user", "content": user_message}
|
|
||||||
],
|
|
||||||
"max_tokens": 500,
|
"max_tokens": 500,
|
||||||
"temperature": 0.7
|
"temperature": 1.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
PERPLEXITY_API_URL,
|
PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
|
||||||
headers=headers,
|
|
||||||
json=payload,
|
|
||||||
timeout=30
|
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
ai_response = data["choices"][0]["message"]["content"]
|
ai_response = data["choices"][0]["message"]["content"]
|
||||||
print(f"💬 Ответ AI: {ai_response[:100]}...")
|
print(f"💬 Ответ AI: {ai_response[:100]}...")
|
||||||
return ai_response
|
return ai_response
|
||||||
|
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
return "Извините, сервер не отвечает. Попробуйте позже."
|
return "Извините, сервер не отвечает. Попробуйте позже."
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
|
|||||||
204
cleaner.py
204
cleaner.py
@@ -1,8 +1,196 @@
|
|||||||
"""
|
"""
|
||||||
Response cleaner module.
|
Response cleaner module.
|
||||||
Removes markdown formatting and special characters from AI responses.
|
Removes markdown formatting and special characters from AI responses.
|
||||||
|
Handles complex number-to-text conversion for Russian language.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
import pymorphy3
|
||||||
|
from num2words import num2words
|
||||||
|
|
||||||
|
# Initialize morphological analyzer
|
||||||
|
morph = pymorphy3.MorphAnalyzer()
|
||||||
|
|
||||||
|
# Preposition to case mapping (simplified heuristics)
|
||||||
|
PREPOSITION_CASES = {
|
||||||
|
'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
|
||||||
|
'во': 'loct',
|
||||||
|
'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe)
|
||||||
|
'о': 'loct',
|
||||||
|
'об': 'loct',
|
||||||
|
'обо': 'loct',
|
||||||
|
'при': 'loct',
|
||||||
|
'у': 'gent',
|
||||||
|
'от': 'gent',
|
||||||
|
'до': 'gent',
|
||||||
|
'из': 'gent',
|
||||||
|
'с': 'gent', # or ablt (instrumental)
|
||||||
|
'со': 'gent',
|
||||||
|
'без': 'gent',
|
||||||
|
'для': 'gent',
|
||||||
|
'вокруг': 'gent',
|
||||||
|
'после': 'gent',
|
||||||
|
'к': 'datv',
|
||||||
|
'ко': 'datv',
|
||||||
|
'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually.
|
||||||
|
'над': 'ablt',
|
||||||
|
'под': 'ablt',
|
||||||
|
'перед': 'ablt',
|
||||||
|
'за': 'ablt', # or acc
|
||||||
|
'между': 'ablt',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mapping pymorphy cases to num2words cases
|
||||||
|
PYMORPHY_TO_NUM2WORDS = {
|
||||||
|
'nomn': 'nominative',
|
||||||
|
'gent': 'genitive',
|
||||||
|
'datv': 'dative',
|
||||||
|
'accs': 'accusative',
|
||||||
|
'ablt': 'instrumental',
|
||||||
|
'loct': 'prepositional',
|
||||||
|
'voct': 'nominative', # Fallback
|
||||||
|
'gen2': 'genitive',
|
||||||
|
'acc2': 'accusative',
|
||||||
|
'loc2': 'prepositional',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Month names in Genitive case (as they appear in dates)
|
||||||
|
MONTHS_GENITIVE = [
|
||||||
|
'января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
|
||||||
|
'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_case_from_preposition(prep_token):
|
||||||
|
"""Return pymorphy case based on preposition."""
|
||||||
|
if not prep_token:
|
||||||
|
return None
|
||||||
|
return PREPOSITION_CASES.get(prep_token.lower())
|
||||||
|
|
||||||
|
def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
|
||||||
|
"""Convert a number string to words with specific parameters."""
|
||||||
|
try:
|
||||||
|
# Handle floats
|
||||||
|
if '.' in number_str or ',' in number_str:
|
||||||
|
num_val = float(number_str.replace(',', '.'))
|
||||||
|
else:
|
||||||
|
num_val = int(number_str)
|
||||||
|
|
||||||
|
return num2words(
|
||||||
|
num_val,
|
||||||
|
lang='ru',
|
||||||
|
to=context_type,
|
||||||
|
case=case,
|
||||||
|
gender=gender
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error converting number {number_str}: {e}")
|
||||||
|
return number_str
|
||||||
|
|
||||||
|
def numbers_to_words(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Intelligent conversion of digits in text to Russian words.
|
||||||
|
Handles years, dates, and basic case agreement.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# 1. Identify "Year" patterns: "1999 год", "в 2024 году"
|
||||||
|
def replace_year_match(match):
|
||||||
|
full_str = match.group(0)
|
||||||
|
prep = match.group(1) # Could be None
|
||||||
|
year_str = match.group(2)
|
||||||
|
year_word = match.group(3) # год, году, года...
|
||||||
|
|
||||||
|
parsed = morph.parse(year_word)[0]
|
||||||
|
case_tag = parsed.tag.case
|
||||||
|
|
||||||
|
if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
|
||||||
|
pass
|
||||||
|
|
||||||
|
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative')
|
||||||
|
|
||||||
|
words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m')
|
||||||
|
|
||||||
|
prefix = f"{prep} " if prep else ""
|
||||||
|
return f"{prefix}{words} {year_word}"
|
||||||
|
|
||||||
|
text = re.sub(
|
||||||
|
r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b',
|
||||||
|
replace_year_match,
|
||||||
|
text
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Identify "Date" patterns: "25 июня", "с 1 мая"
|
||||||
|
# Matches: (Preposition)? (Day) (Month_Genitive)
|
||||||
|
# Day is usually 1-31.
|
||||||
|
month_regex = '|'.join(MONTHS_GENITIVE)
|
||||||
|
|
||||||
|
def replace_date_match(match):
|
||||||
|
prep = match.group(1)
|
||||||
|
day_str = match.group(2)
|
||||||
|
month_word = match.group(3)
|
||||||
|
|
||||||
|
# Determine case
|
||||||
|
# Default to Genitive ("25 июня" -> "двадцать пятого июня")
|
||||||
|
case = 'genitive'
|
||||||
|
|
||||||
|
if prep:
|
||||||
|
prep_clean = prep.strip().lower()
|
||||||
|
# Specific overrides for dates
|
||||||
|
if prep_clean == 'на':
|
||||||
|
case = 'accusative' # на 5 мая -> на пятое
|
||||||
|
elif prep_clean == 'по':
|
||||||
|
case = 'accusative' # по 5 мая -> по пятое (limit)
|
||||||
|
elif prep_clean == 'к':
|
||||||
|
case = 'dative' # к 5 мая -> к пятому
|
||||||
|
elif prep_clean in ['с', 'до', 'от']:
|
||||||
|
case = 'genitive' # с 5 мая -> с пятого
|
||||||
|
else:
|
||||||
|
# Fallback to general preposition map
|
||||||
|
morph_case = get_case_from_preposition(prep_clean)
|
||||||
|
if morph_case:
|
||||||
|
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive')
|
||||||
|
|
||||||
|
# Convert to Ordinal
|
||||||
|
# Dates are neuter ("число" implies neuter: "пятое", "пятого")
|
||||||
|
# However, num2words for genitive ordinal:
|
||||||
|
# 5, ordinal, genitive -> "пятого" (masc/neut are same)
|
||||||
|
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
|
||||||
|
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
|
||||||
|
|
||||||
|
words = convert_number(day_str, context_type='ordinal', case=case, gender='n')
|
||||||
|
|
||||||
|
prefix = f"{prep} " if prep else ""
|
||||||
|
return f"{prefix}{words} {month_word}"
|
||||||
|
|
||||||
|
text = re.sub(
|
||||||
|
r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b',
|
||||||
|
replace_date_match,
|
||||||
|
text
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Handle remaining numbers (Cardinals)
|
||||||
|
def replace_cardinal_match(match):
|
||||||
|
prep = match.group(1)
|
||||||
|
num_str = match.group(2)
|
||||||
|
|
||||||
|
case = 'nominative'
|
||||||
|
if prep:
|
||||||
|
morph_case = get_case_from_preposition(prep.strip())
|
||||||
|
if morph_case:
|
||||||
|
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative')
|
||||||
|
|
||||||
|
words = convert_number(num_str, context_type='cardinal', case=case)
|
||||||
|
|
||||||
|
prefix = f"{prep} " if prep else ""
|
||||||
|
return f"{prefix}{words}"
|
||||||
|
|
||||||
|
text = re.sub(
|
||||||
|
r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b',
|
||||||
|
replace_cardinal_match,
|
||||||
|
text
|
||||||
|
)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def clean_response(text: str) -> str:
|
def clean_response(text: str) -> str:
|
||||||
@@ -19,9 +207,10 @@ def clean_response(text: str) -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Remove citation references like [1], [2], [citation], etc.
|
# Remove citation references like [1], [2], [citation], etc.
|
||||||
text = re.sub(r'\[\d+\]', '', text)
|
# Using hex escapes for brackets to avoid escaping issues
|
||||||
text = re.sub(r'\[citation\s*needed\]', '', text, flags=re.IGNORECASE)
|
text = re.sub(r'\x5B\d+\x5D', '', text)
|
||||||
text = re.sub(r'\[source\]', '', text, flags=re.IGNORECASE)
|
text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE)
|
||||||
|
|
||||||
# Remove markdown bold **text** and __text__
|
# Remove markdown bold **text** and __text__
|
||||||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||||
@@ -38,10 +227,10 @@ def clean_response(text: str) -> str:
|
|||||||
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove markdown links [text](url) -> text
|
# Remove markdown links [text](url) -> text
|
||||||
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text)
|
||||||
|
|
||||||
# Remove markdown images 
|
# Remove markdown images 
|
||||||
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
|
text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text)
|
||||||
|
|
||||||
# Remove inline code `code`
|
# Remove inline code `code`
|
||||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||||
@@ -62,6 +251,9 @@ def clean_response(text: str) -> str:
|
|||||||
# Remove HTML tags if any
|
# Remove HTML tags if any
|
||||||
text = re.sub(r'<[^>]+>', '', text)
|
text = re.sub(r'<[^>]+>', '', text)
|
||||||
|
|
||||||
|
# Convert numbers to words (Russian)
|
||||||
|
text = numbers_to_words(text)
|
||||||
|
|
||||||
# Remove extra whitespace
|
# Remove extra whitespace
|
||||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||||
text = re.sub(r' +', ' ', text)
|
text = re.sub(r' +', ' ', text)
|
||||||
@@ -69,4 +261,4 @@ def clean_response(text: str) -> str:
|
|||||||
# Clean up and return
|
# Clean up and return
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
|
|
||||||
return text
|
return text
|
||||||
@@ -14,7 +14,7 @@ BASE_DIR = Path(__file__).parent
|
|||||||
|
|
||||||
# Perplexity API configuration
|
# Perplexity API configuration
|
||||||
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
||||||
PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-online")
|
PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-128k-chat")
|
||||||
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
||||||
|
|
||||||
# Porcupine configuration
|
# Porcupine configuration
|
||||||
|
|||||||
21
main.py
21
main.py
@@ -13,6 +13,7 @@ Flow:
|
|||||||
|
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
||||||
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
||||||
@@ -49,6 +50,9 @@ def main():
|
|||||||
init_tts() # Then initialize TTS model
|
init_tts() # Then initialize TTS model
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
# Initialize chat history (last 10 exchanges = 20 messages)
|
||||||
|
chat_history = deque(maxlen=20)
|
||||||
|
|
||||||
# Main loop
|
# Main loop
|
||||||
skip_wakeword = False
|
skip_wakeword = False
|
||||||
while True:
|
while True:
|
||||||
@@ -76,6 +80,14 @@ def main():
|
|||||||
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check for stop commands
|
||||||
|
user_text_lower = user_text.lower().strip()
|
||||||
|
if user_text_lower in ["стоп", "александр", "стоп александр"]:
|
||||||
|
print("_" * 50)
|
||||||
|
print("💤 Жду 'Alexandr' для активации...")
|
||||||
|
skip_wakeword = False
|
||||||
|
continue
|
||||||
|
|
||||||
# Check for volume command
|
# Check for volume command
|
||||||
if user_text.lower().startswith("громкость"):
|
if user_text.lower().startswith("громкость"):
|
||||||
try:
|
try:
|
||||||
@@ -102,7 +114,14 @@ def main():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Step 3: Send to AI
|
# Step 3: Send to AI
|
||||||
ai_response = ask_ai(user_text)
|
# Add user message to history
|
||||||
|
chat_history.append({"role": "user", "content": user_text})
|
||||||
|
|
||||||
|
# Get response using history
|
||||||
|
ai_response = ask_ai(list(chat_history))
|
||||||
|
|
||||||
|
# Add AI response to history
|
||||||
|
chat_history.append({"role": "assistant", "content": ai_response})
|
||||||
|
|
||||||
# Step 4: Clean response
|
# Step 4: Clean response
|
||||||
clean_text = clean_response(ai_response)
|
clean_text = clean_response(ai_response)
|
||||||
|
|||||||
@@ -24,3 +24,5 @@ omegaconf>=2.3.0
|
|||||||
|
|
||||||
# Utils
|
# Utils
|
||||||
numpy>=1.24.0
|
numpy>=1.24.0
|
||||||
|
num2words
|
||||||
|
pymorphy3
|
||||||
|
|||||||
20
test_cleaner.py
Normal file
20
test_cleaner.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
import cleaner
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Testing cleaner...")
|
||||||
|
text = "В 1999 году."
|
||||||
|
res = cleaner.clean_response(text)
|
||||||
|
print(f"Result: {res}")
|
||||||
|
|
||||||
|
text = ""
|
||||||
|
res = cleaner.clean_response(text)
|
||||||
|
print(f"Result: {res}")
|
||||||
|
|
||||||
|
text = "[link](http://example.com)"
|
||||||
|
res = cleaner.clean_response(text)
|
||||||
|
print(f"Result: {res}")
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
traceback.print_exc()
|
||||||
Reference in New Issue
Block a user