Fix TTS time phrases and STT cleanup

This commit is contained in:
2026-03-15 16:22:00 +03:00
parent cb54a9ee75
commit 6add70fcd2
4 changed files with 146 additions and 2 deletions

View File

@@ -147,6 +147,73 @@ def numbers_to_words(text: str) -> str:
preps_list = "|".join(map(re.escape, PREPOSITION_CASES.keys()))
# Время вида "в 7:00" / "во 7:00" / "к 7:05" / "07:00" -> человеческая русская форма.
# Важно: "в семь" (не "в семи"), "к семи" (дательный).
def _minute_words(minute_val: int) -> str:
if minute_val == 0:
return "ровно"
if minute_val < 10:
return "ноль " + convert_number(
str(minute_val), context_type="cardinal", case="nominative", gender="m"
)
return convert_number(str(minute_val), context_type="cardinal", case="nominative", gender="m")
def replace_time_match(match):
prep = match.group(1) or ""
hour_str = match.group(2)
minute_str = match.group(3)
try:
hour_val = int(hour_str)
minute_val = int(minute_str)
except Exception:
return match.group(0)
if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
return match.group(0)
prep_clean = prep.strip().lower()
if prep_clean in {"в", "во"}:
hour_case = "accusative"
elif prep_clean in {"к", "ко"}:
hour_case = "dative"
else:
hour_case = "nominative"
hour_words = convert_number(str(hour_val), context_type="cardinal", case=hour_case, gender="m")
minute_words = _minute_words(minute_val)
prefix = f"{prep} " if prep else ""
return f"{prefix}{hour_words} {minute_words}"
def replace_time_no_prep_match(match):
hour_str = match.group(1)
minute_str = match.group(2)
try:
hour_val = int(hour_str)
minute_val = int(minute_str)
except Exception:
return match.group(0)
if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
return match.group(0)
hour_words = convert_number(str(hour_val), context_type="cardinal", case="nominative", gender="m")
minute_words = _minute_words(minute_val)
return f"{hour_words} {minute_words}"
text = re.sub(
r"(?i)\b(в|во|к|ко)\s+(\d{1,2})\s*:\s*(\d{2})\b",
replace_time_match,
text,
)
text = re.sub(
r"\b(\d{1,2})\s*:\s*(\d{2})\b",
replace_time_no_prep_match,
text,
)
# Года с суффиксом
def replace_year_suffix_match(match):
prep = match.group(1)