Update assistant features and docs
This commit is contained in:
@@ -12,6 +12,7 @@ Handles complex number-to-text conversion for Russian language.
|
||||
import re
|
||||
import pymorphy3
|
||||
from num2words import num2words
|
||||
from .roman import roman_to_int
|
||||
|
||||
# Инициализация морфологического анализатора (для определения падежей)
|
||||
morph = pymorphy3.MorphAnalyzer()
|
||||
@@ -334,6 +335,50 @@ def numbers_to_words(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def roman_numerals_to_words(text: str) -> str:
|
||||
"""
|
||||
Преобразует римские цифры в порядковые числительные с учетом
|
||||
морфологии предыдущего слова.
|
||||
Пример: "Ивана III" -> "Ивана третьего".
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
def replace_roman_match(match):
|
||||
prev_word = match.group(1)
|
||||
roman = match.group(2)
|
||||
|
||||
number = roman_to_int(roman)
|
||||
if number is None:
|
||||
return match.group(0)
|
||||
|
||||
case = "nominative"
|
||||
gender = "m"
|
||||
|
||||
try:
|
||||
parsed = morph.parse(prev_word)[0]
|
||||
case_tag = parsed.tag.case
|
||||
gender_tag = parsed.tag.gender
|
||||
|
||||
if case_tag:
|
||||
case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative")
|
||||
if gender_tag:
|
||||
gender = PYMORPHY_TO_GENDER.get(gender_tag, "m")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
ordinal = convert_number(
|
||||
str(number), context_type="ordinal", case=case, gender=gender
|
||||
)
|
||||
return f"{prev_word} {ordinal}"
|
||||
|
||||
return re.sub(
|
||||
r"(?i)\b([А-Яа-яЁё]+)\s+([IVXLCDM]+)\b",
|
||||
replace_roman_match,
|
||||
text,
|
||||
)
|
||||
|
||||
|
||||
def clean_response(text: str, language: str = "ru") -> str:
|
||||
"""
|
||||
Основная функция очистки.
|
||||
@@ -408,9 +453,11 @@ def clean_response(text: str, language: str = "ru") -> str:
|
||||
flags=re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
|
||||
# Convert numbers to words only for Russian, and only if digits exist
|
||||
if language == "ru" and re.search(r"\d", text):
|
||||
text = numbers_to_words(text)
|
||||
# Convert Roman numerals and Arabic digits to words for Russian.
|
||||
if language == "ru":
|
||||
text = roman_numerals_to_words(text)
|
||||
if re.search(r"\d", text):
|
||||
text = numbers_to_words(text)
|
||||
|
||||
# Remove extra whitespace
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
|
||||
Reference in New Issue
Block a user