Update assistant features and docs

This commit is contained in:
2026-02-12 14:12:37 +03:00
parent bb3133a1c0
commit ca8ebd6657
19 changed files with 814 additions and 180 deletions

View File

@@ -12,6 +12,7 @@ Handles complex number-to-text conversion for Russian language.
import re
import pymorphy3
from num2words import num2words
from .roman import roman_to_int
# Инициализация морфологического анализатора (для определения падежей)
morph = pymorphy3.MorphAnalyzer()
@@ -334,6 +335,50 @@ def numbers_to_words(text: str) -> str:
return text
def roman_numerals_to_words(text: str) -> str:
"""
Преобразует римские цифры в порядковые числительные с учетом
морфологии предыдущего слова.
Пример: "Ивана III" -> "Ивана третьего".
"""
if not text:
return ""
def replace_roman_match(match):
prev_word = match.group(1)
roman = match.group(2)
number = roman_to_int(roman)
if number is None:
return match.group(0)
case = "nominative"
gender = "m"
try:
parsed = morph.parse(prev_word)[0]
case_tag = parsed.tag.case
gender_tag = parsed.tag.gender
if case_tag:
case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative")
if gender_tag:
gender = PYMORPHY_TO_GENDER.get(gender_tag, "m")
except Exception:
pass
ordinal = convert_number(
str(number), context_type="ordinal", case=case, gender=gender
)
return f"{prev_word} {ordinal}"
return re.sub(
r"(?i)\b([А-Яа-яЁё]+)\s+([IVXLCDM]+)\b",
replace_roman_match,
text,
)
def clean_response(text: str, language: str = "ru") -> str:
"""
Основная функция очистки.
@@ -408,9 +453,11 @@ def clean_response(text: str, language: str = "ru") -> str:
flags=re.IGNORECASE | re.MULTILINE,
)
# Convert numbers to words only for Russian, and only if digits exist
if language == "ru" and re.search(r"\d", text):
text = numbers_to_words(text)
# Convert Roman numerals and Arabic digits to words for Russian.
if language == "ru":
text = roman_numerals_to_words(text)
if re.search(r"\d", text):
text = numbers_to_words(text)
# Remove extra whitespace
text = re.sub(r"\n{3,}", "\n\n", text)