Files
smart-speaker/app/core/commands.py

67 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Command parsing helpers.
"""
import re
_STOP_WORDS_STRICT = {
"стоп",
"хватит",
"перестань",
"замолчи",
"прекрати",
"тихо",
"stop",
}
_STOP_PATTERNS_LENIENT = [
r"\bстоп\w*\b",
r"\bstop\b",
r"\bхватит\b",
r"\bперестан\w*\b",
r"\bпрекрат\w*\b",
r"\амолч\w*\b",
r"\bтише\b",
r"\bтихо\b",
r"\bвыключ\w*\b",
r"\bотключ\w*\b",
r"\bостанов\w*\b",
r"\bотмен\w*\b",
r"\bпауза\b",
r"\остаточно\b",
]
_STOP_PATTERNS_LENIENT_COMPILED = [re.compile(p) for p in _STOP_PATTERNS_LENIENT]
def _normalize_text(text: str) -> str:
text = text.lower().replace("ё", "е")
text = re.sub(r"[^\w\s]+", " ", text, flags=re.UNICODE)
text = re.sub(r"\s+", " ", text, flags=re.UNICODE).strip()
return text
def is_stop_command(text: str, mode: str = "strict") -> bool:
"""
Detect stop commands in text.
mode:
- "strict": only exact stop words.
- "lenient": broader patterns for noisy recognition.
"""
if not text:
return False
normalized = _normalize_text(text)
if not normalized:
return False
if mode == "strict":
words = normalized.split()
return any(word in _STOP_WORDS_STRICT for word in words)
for pattern in _STOP_PATTERNS_LENIENT_COMPILED:
if pattern.search(normalized):
return True
return False