Fix TTS time phrases and STT cleanup

2026-03-15 16:22:00 +03:00
parent cb54a9ee75
commit 6add70fcd2
4 changed files with 146 additions and 2 deletions
--- a/app/core/cleaner.py
+++ b/app/core/cleaner.py
@@ -147,6 +147,73 @@ def numbers_to_words(text: str) -> str:

    preps_list = "|".join(map(re.escape, PREPOSITION_CASES.keys()))

+    # Время вида "в 7:00" / "во 7:00" / "к 7:05" / "07:00" -> человеческая русская форма.
+    # Важно: "в семь" (не "в семи"), "к семи" (дательный).
+    def _minute_words(minute_val: int) -> str:
+        if minute_val == 0:
+            return "ровно"
+        if minute_val < 10:
+            return "ноль " + convert_number(
+                str(minute_val), context_type="cardinal", case="nominative", gender="m"
+            )
+        return convert_number(str(minute_val), context_type="cardinal", case="nominative", gender="m")
+
+    def replace_time_match(match):
+        prep = match.group(1) or ""
+        hour_str = match.group(2)
+        minute_str = match.group(3)
+
+        try:
+            hour_val = int(hour_str)
+            minute_val = int(minute_str)
+        except Exception:
+            return match.group(0)
+
+        if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
+            return match.group(0)
+
+        prep_clean = prep.strip().lower()
+        if prep_clean in {"в", "во"}:
+            hour_case = "accusative"
+        elif prep_clean in {"к", "ко"}:
+            hour_case = "dative"
+        else:
+            hour_case = "nominative"
+
+        hour_words = convert_number(str(hour_val), context_type="cardinal", case=hour_case, gender="m")
+        minute_words = _minute_words(minute_val)
+
+        prefix = f"{prep} " if prep else ""
+        return f"{prefix}{hour_words} {minute_words}"
+
+    def replace_time_no_prep_match(match):
+        hour_str = match.group(1)
+        minute_str = match.group(2)
+
+        try:
+            hour_val = int(hour_str)
+            minute_val = int(minute_str)
+        except Exception:
+            return match.group(0)
+
+        if not (0 <= hour_val <= 23 and 0 <= minute_val <= 59):
+            return match.group(0)
+
+        hour_words = convert_number(str(hour_val), context_type="cardinal", case="nominative", gender="m")
+        minute_words = _minute_words(minute_val)
+        return f"{hour_words} {minute_words}"
+
+    text = re.sub(
+        r"(?i)\b(в|во|к|ко)\s+(\d{1,2})\s*:\s*(\d{2})\b",
+        replace_time_match,
+        text,
+    )
+    text = re.sub(
+        r"\b(\d{1,2})\s*:\s*(\d{2})\b",
+        replace_time_no_prep_match,
+        text,
+    )
+
    # Года с суффиксом
    def replace_year_suffix_match(match):
        prep = match.group(1)