Fix TTS time phrases and STT cleanup
This commit is contained in:
@@ -147,6 +147,73 @@ def numbers_to_words(text: str) -> str:
|
||||
|
||||
preps_list = "|".join(map(re.escape, PREPOSITION_CASES.keys()))
|
||||
|
||||
# Время вида "в 7:00" / "во 7:00" / "к 7:05" / "07:00" -> человеческая русская форма.
|
||||
# Важно: "в семь" (не "в семи"), "к семи" (дательный).
|
||||
def _minute_words(minute_val: int) -> str:
|
||||
if minute_val == 0:
|
||||
return "ровно"
|
||||
if minute_val < 10:
|
||||
return "ноль " + convert_number(
|
||||
str(minute_val), context_type="cardinal", case="nominative", gender="m"
|
||||
)
|
||||
return convert_number(str(minute_val), context_type="cardinal", case="nominative", gender="m")
|
||||
|
||||
def replace_time_match(match):
|
||||
prep = match.group(1) or ""
|
||||
hour_str = match.group(2)
|
||||
minute_str = match.group(3)
|
||||
|
||||
try:
|
||||
hour_val = int(hour_str)
|
||||
minute_val = int(minute_str)
|
||||
except Exception:
|
||||
return match.group(0)
|
||||
|
||||
if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
|
||||
return match.group(0)
|
||||
|
||||
prep_clean = prep.strip().lower()
|
||||
if prep_clean in {"в", "во"}:
|
||||
hour_case = "accusative"
|
||||
elif prep_clean in {"к", "ко"}:
|
||||
hour_case = "dative"
|
||||
else:
|
||||
hour_case = "nominative"
|
||||
|
||||
hour_words = convert_number(str(hour_val), context_type="cardinal", case=hour_case, gender="m")
|
||||
minute_words = _minute_words(minute_val)
|
||||
|
||||
prefix = f"{prep} " if prep else ""
|
||||
return f"{prefix}{hour_words} {minute_words}"
|
||||
|
||||
def replace_time_no_prep_match(match):
|
||||
hour_str = match.group(1)
|
||||
minute_str = match.group(2)
|
||||
|
||||
try:
|
||||
hour_val = int(hour_str)
|
||||
minute_val = int(minute_str)
|
||||
except Exception:
|
||||
return match.group(0)
|
||||
|
||||
if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
|
||||
return match.group(0)
|
||||
|
||||
hour_words = convert_number(str(hour_val), context_type="cardinal", case="nominative", gender="m")
|
||||
minute_words = _minute_words(minute_val)
|
||||
return f"{hour_words} {minute_words}"
|
||||
|
||||
text = re.sub(
|
||||
r"(?i)\b(в|во|к|ко)\s+(\d{1,2})\s*:\s*(\d{2})\b",
|
||||
replace_time_match,
|
||||
text,
|
||||
)
|
||||
text = re.sub(
|
||||
r"\b(\d{1,2})\s*:\s*(\d{2})\b",
|
||||
replace_time_no_prep_match,
|
||||
text,
|
||||
)
|
||||
|
||||
# Года с суффиксом
|
||||
def replace_year_suffix_match(match):
|
||||
prep = match.group(1)
|
||||
|
||||
Reference in New Issue
Block a user