translator но без озвучивания слов на английском

This commit is contained in:
2026-01-09 01:01:27 +03:00
parent 53809c03f4
commit 242ead5355
11 changed files with 845 additions and 238 deletions

BIN
Apex-1.mp3 Normal file

Binary file not shown.

67
ai.py
View File

@@ -16,6 +16,10 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов. Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов.
ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные.""" ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные."""
TRANSLATION_SYSTEM_PROMPT = """You are a translation engine.
Translate from {source} to {target}.
Return only the translated text, without quotes, comments, or explanations."""
def ask_ai(messages_history: list) -> str: def ask_ai(messages_history: list) -> str:
""" """
@@ -72,3 +76,66 @@ def ask_ai(messages_history: list) -> str:
except (KeyError, IndexError) as e: except (KeyError, IndexError) as e:
print(f"❌ Ошибка парсинга ответа: {e}") print(f"❌ Ошибка парсинга ответа: {e}")
return "Не удалось обработать ответ от AI." return "Не удалось обработать ответ от AI."
def translate_text(text: str, source_lang: str, target_lang: str) -> str:
"""
Translate text using Perplexity AI.
Args:
text: Text to translate
source_lang: Source language code ("ru" or "en")
target_lang: Target language code ("ru" or "en")
Returns:
Translated text
"""
if not text:
return "Извините, я не расслышал текст для перевода."
lang_names = {"ru": "Russian", "en": "English"}
source_name = lang_names.get(source_lang, source_lang)
target_name = lang_names.get(target_lang, target_lang)
print(f"🌍 Перевод: {source_name} -> {target_name}: {text[:60]}...")
headers = {
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
"Content-Type": "application/json",
}
messages = [
{
"role": "system",
"content": TRANSLATION_SYSTEM_PROMPT.format(
source=source_name, target=target_name
),
},
{"role": "user", "content": text},
]
payload = {
"model": PERPLEXITY_MODEL,
"messages": messages,
"max_tokens": 400,
"temperature": 0.2,
}
try:
response = requests.post(
PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
)
response.raise_for_status()
data = response.json()
ai_response = data["choices"][0]["message"]["content"]
return ai_response.strip()
except requests.exceptions.Timeout:
return "Извините, сервер не отвечает. Попробуйте позже."
except requests.exceptions.RequestException as e:
print(f"❌ Ошибка API перевода: {e}")
return "Произошла ошибка при переводе. Попробуйте ещё раз."
except (KeyError, IndexError) as e:
print(f"❌ Ошибка парсинга ответа перевода: {e}")
return "Не удалось обработать перевод."

194
alarm.py Normal file
View File

@@ -0,0 +1,194 @@
"""
Alarm clock module.
Handles alarm scheduling, persistence, and playback.
"""
import json
import time
import subprocess
import re
import threading
from datetime import datetime
from pathlib import Path
from config import BASE_DIR
from local_stt import listen_for_keywords
ALARM_FILE = BASE_DIR / "alarms.json"
ALARM_SOUND = BASE_DIR / "Apex-1.mp3"
class AlarmClock:
def __init__(self):
self.alarms = []
self.load_alarms()
def load_alarms(self):
"""Load alarms from JSON file."""
if ALARM_FILE.exists():
try:
with open(ALARM_FILE, "r", encoding="utf-8") as f:
self.alarms = json.load(f)
except Exception as e:
print(f"❌ Ошибка загрузки будильников: {e}")
self.alarms = []
def save_alarms(self):
"""Save alarms to JSON file."""
try:
with open(ALARM_FILE, "w", encoding="utf-8") as f:
json.dump(self.alarms, f, indent=4)
except Exception as e:
print(f"❌ Ошибка сохранения будильников: {e}")
def add_alarm(self, hour: int, minute: int):
"""Add a new alarm."""
# Check if already exists
for alarm in self.alarms:
if alarm["hour"] == hour and alarm["minute"] == minute:
alarm["active"] = True
self.save_alarms()
return
self.alarms.append({
"hour": hour,
"minute": minute,
"active": True
})
self.save_alarms()
print(f"⏰ Будильник установлен на {hour:02d}:{minute:02d}")
def cancel_all_alarms(self):
"""Cancel all active alarms."""
for alarm in self.alarms:
alarm["active"] = False
self.save_alarms()
print("🔕 Все будильники отменены.")
def check_alarms(self):
"""Check if any alarm should trigger now. Returns True if triggered."""
now = datetime.now()
triggered = False
for alarm in self.alarms:
if alarm["active"]:
if alarm["hour"] == now.hour and alarm["minute"] == now.minute:
# Prevent re-triggering within the same minute?
# We should disable it immediately or track last trigger time.
# For simple logic: disable it (one-time alarm).
# But wait, checking every second?
# If I disable it, it won't ring for the whole minute.
# Correct.
print(f"⏰ ВРЕМЯ БУДИЛЬНИКА: {alarm['hour']:02d}:{alarm['minute']:02d}")
alarm["active"] = False
triggered = True
self.trigger_alarm()
break # Trigger one at a time
if triggered:
self.save_alarms()
return True
return False
def trigger_alarm(self):
"""Play alarm sound and wait for stop command."""
print("🔔 БУДИЛЬНИК ЗВОНИТ! (Скажите 'Стоп' или 'Александр стоп')")
# Start playing sound in loop
# -q for quiet (no output)
# --loop -1 for infinite loop
cmd = ["mpg123", "-q", "--loop", "-1", str(ALARM_SOUND)]
try:
process = subprocess.Popen(cmd)
except FileNotFoundError:
print("❌ Ошибка: mpg123 не найден. Установите его: sudo apt install mpg123")
return
try:
# Listen for stop command using local Vosk
# Loop until stop word is heard
stop_words = ["стоп", "хватит", "тихо", "замолчи", "отмена", "александр стоп"]
while True:
# Listen in short bursts to be responsive
text = listen_for_keywords(stop_words, timeout=3.0)
if text:
print(f"🛑 Будильник остановлен по команде: '{text}'")
break
except Exception as e:
print(f"❌ Ошибка во время будильника: {e}")
finally:
# Kill the player
process.terminate()
try:
process.wait(timeout=1)
except subprocess.TimeoutExpired:
process.kill()
print("🔕 Будильник выключен.")
def parse_command(self, text: str) -> str | None:
"""
Parse user text for alarm commands.
Returns response string if command handled, None otherwise.
"""
text = text.lower()
if "будильник" not in text and "разбуди" not in text:
return None
if "отмени" in text:
self.cancel_all_alarms()
return "Хорошо, я отменил все будильники."
# Regex to find time: HH:MM, HH-MM, HH MM, HH часов MM минут
# 1. "07:30", "7:30"
match = re.search(r'\b(\d{1,2})[:.-](\d{2})\b', text)
if match:
h, m = int(match.group(1)), int(match.group(2))
if 0 <= h <= 23 and 0 <= m <= 59:
self.add_alarm(h, m)
return f"Я установил будильник на {h} часов {m} минут."
# 2. "7 часов 30 минут" or "7 30"
# Search for pattern: digits ... (digits)?
# Complex to separate from other numbers.
# Simple heuristics:
words = text.split()
nums = [int(s) for s in text.split() if s.isdigit()]
# "на 7" -> 7:00
if "на" in words or "в" in words:
# Try to find number after preposition
pass
# Let's rely on explicit digit search if regex failed
# Patterns: "на 8", "на 8 30", "на 8 часов 30 минут", "на 8 часов"
# Regex to capture hour and optional minute
# Matches: "на <H> [часов] [M] [минут]"
match_time = re.search(r'на\s+(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?', text)
if match_time:
h = int(match_time.group(1))
m = int(match_time.group(2)) if match_time.group(2) else 0
# Handle AM/PM if specified
if "вечера" in text and h < 12:
h += 12
elif "утра" in text and h == 12:
h = 0
if 0 <= h <= 23 and 0 <= m <= 59:
self.add_alarm(h, m)
return f"Хорошо, разбужу вас в {h}:{m:02d}."
return "Я не понял время для будильника. Пожалуйста, скажите точное время, например 'семь тридцать'."
# Global instance
_alarm_clock = None
def get_alarm_clock():
global _alarm_clock
if _alarm_clock is None:
_alarm_clock = AlarmClock()
return _alarm_clock

12
alarms.json Normal file
View File

@@ -0,0 +1,12 @@
[
{
"hour": 10,
"minute": 15,
"active": true
},
{
"hour": 3,
"minute": 42,
"active": false
}
]

View File

@@ -3,6 +3,7 @@ Response cleaner module.
Removes markdown formatting and special characters from AI responses. Removes markdown formatting and special characters from AI responses.
Handles complex number-to-text conversion for Russian language. Handles complex number-to-text conversion for Russian language.
""" """
import re import re
import pymorphy3 import pymorphy3
from num2words import num2words from num2words import num2words
@@ -12,79 +13,86 @@ morph = pymorphy3.MorphAnalyzer()
# Preposition to case mapping (simplified heuristics) # Preposition to case mapping (simplified heuristics)
PREPOSITION_CASES = { PREPOSITION_CASES = {
'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct "в": "loct", # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
'во': 'loct', "во": "loct",
'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe) "на": "accs", # Dates: 'na 5 maya' -> Accusative (na pyatoe)
'о': 'loct', "о": "loct",
'об': 'loct', "об": "loct",
'обо': 'loct', "обо": "loct",
'при': 'loct', "при": "loct",
'у': 'gent', "у": "gent",
'от': 'gent', "от": "gent",
'до': 'gent', "до": "gent",
'из': 'gent', "из": "gent",
'с': 'gent', # or ablt (instrumental) "с": "gent", # or ablt (instrumental)
'со': 'gent', "со": "gent",
'без': 'gent', "без": "gent",
'для': 'gent', "для": "gent",
'вокруг': 'gent', "вокруг": "gent",
'после': 'gent', "после": "gent",
'к': 'datv', "к": "datv",
'ко': 'datv', "ко": "datv",
'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually. "по": "datv", # or accs for dates (limit). Heuristic: datv defaults usually.
'над': 'ablt', "над": "ablt",
'под': 'ablt', "под": "ablt",
'перед': 'ablt', "перед": "ablt",
'за': 'ablt', # or acc "за": "ablt", # or acc
'между': 'ablt', "между": "ablt",
} }
# Mapping pymorphy cases to num2words cases # Mapping pymorphy cases to num2words cases
PYMORPHY_TO_NUM2WORDS = { PYMORPHY_TO_NUM2WORDS = {
'nomn': 'nominative', "nomn": "nominative",
'gent': 'genitive', "gent": "genitive",
'datv': 'dative', "datv": "dative",
'accs': 'accusative', "accs": "accusative",
'ablt': 'instrumental', "ablt": "instrumental",
'loct': 'prepositional', "loct": "prepositional",
'voct': 'nominative', # Fallback "voct": "nominative", # Fallback
'gen2': 'genitive', "gen2": "genitive",
'acc2': 'accusative', "acc2": "accusative",
'loc2': 'prepositional', "loc2": "prepositional",
} }
# Month names in Genitive case (as they appear in dates) # Month names in Genitive case (as they appear in dates)
MONTHS_GENITIVE = [ MONTHS_GENITIVE = [
'января', 'февраля', 'марта', 'апреля', 'мая', 'июня', "января",
'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря' "февраля",
"марта",
"апреля",
"мая",
"июня",
"июля",
"августа",
"сентября",
"октября",
"ноября",
"декабря",
] ]
def get_case_from_preposition(prep_token): def get_case_from_preposition(prep_token):
"""Return pymorphy case based on preposition.""" """Return pymorphy case based on preposition."""
if not prep_token: if not prep_token:
return None return None
return PREPOSITION_CASES.get(prep_token.lower()) return PREPOSITION_CASES.get(prep_token.lower())
def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
def convert_number(number_str, context_type="cardinal", case="nominative", gender="m"):
"""Convert a number string to words with specific parameters.""" """Convert a number string to words with specific parameters."""
try: try:
# Handle floats # Handle floats
if '.' in number_str or ',' in number_str: if "." in number_str or "," in number_str:
num_val = float(number_str.replace(',', '.')) num_val = float(number_str.replace(",", "."))
else: else:
num_val = int(number_str) num_val = int(number_str)
return num2words( return num2words(num_val, lang="ru", to=context_type, case=case, gender=gender)
num_val,
lang='ru',
to=context_type,
case=case,
gender=gender
)
except Exception as e: except Exception as e:
print(f"Error converting number {number_str}: {e}") print(f"Error converting number {number_str}: {e}")
return number_str return number_str
def numbers_to_words(text: str) -> str: def numbers_to_words(text: str) -> str:
""" """
Intelligent conversion of digits in text to Russian words. Intelligent conversion of digits in text to Russian words.
@@ -96,59 +104,65 @@ def numbers_to_words(text: str) -> str:
# 1. Identify "Year" patterns: "1999 год", "в 2024 году" # 1. Identify "Year" patterns: "1999 год", "в 2024 году"
def replace_year_match(match): def replace_year_match(match):
full_str = match.group(0) full_str = match.group(0)
prep = match.group(1) # Could be None prep = match.group(1) # Could be None
year_str = match.group(2) year_str = match.group(2)
year_word = match.group(3) # год, году, года... year_word = match.group(3) # год, году, года...
parsed = morph.parse(year_word)[0] parsed = morph.parse(year_word)[0]
case_tag = parsed.tag.case case_tag = parsed.tag.case
if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
pass
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative') if (
prep
words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m') and prep.strip().lower() in ["в", "во"]
and case_tag in ["accs", "nomn"]
):
pass
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative")
words = convert_number(
year_str, context_type="ordinal", case=nw_case, gender="m"
)
prefix = f"{prep} " if prep else "" prefix = f"{prep} " if prep else ""
return f"{prefix}{words} {year_word}" return f"{prefix}{words} {year_word}"
text = re.sub( text = re.sub(
r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b', r"(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b",
replace_year_match, replace_year_match,
text text,
) )
# 2. Identify "Date" patterns: "25 июня", "с 1 мая" # 2. Identify "Date" patterns: "25 июня", "с 1 мая"
# Matches: (Preposition)? (Day) (Month_Genitive) # Matches: (Preposition)? (Day) (Month_Genitive)
# Day is usually 1-31. # Day is usually 1-31.
month_regex = '|'.join(MONTHS_GENITIVE) month_regex = "|".join(MONTHS_GENITIVE)
def replace_date_match(match): def replace_date_match(match):
prep = match.group(1) prep = match.group(1)
day_str = match.group(2) day_str = match.group(2)
month_word = match.group(3) month_word = match.group(3)
# Determine case # Determine case
# Default to Genitive ("25 июня" -> "двадцать пятого июня") # Default to Genitive ("25 июня" -> "двадцать пятого июня")
case = 'genitive' case = "genitive"
if prep: if prep:
prep_clean = prep.strip().lower() prep_clean = prep.strip().lower()
# Specific overrides for dates # Specific overrides for dates
if prep_clean == 'на': if prep_clean == "на":
case = 'accusative' # на 5 мая -> на пятое case = "accusative" # на 5 мая -> на пятое
elif prep_clean == 'по': elif prep_clean == "по":
case = 'accusative' # по 5 мая -> по пятое (limit) case = "accusative" # по 5 мая -> по пятое (limit)
elif prep_clean == 'к': elif prep_clean == "к":
case = 'dative' # к 5 мая -> к пятому case = "dative" # к 5 мая -> к пятому
elif prep_clean in ['с', 'до', 'от']: elif prep_clean in ["с", "до", "от"]:
case = 'genitive' # с 5 мая -> с пятого case = "genitive" # с 5 мая -> с пятого
else: else:
# Fallback to general preposition map # Fallback to general preposition map
morph_case = get_case_from_preposition(prep_clean) morph_case = get_case_from_preposition(prep_clean)
if morph_case: if morph_case:
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive') case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "genitive")
# Convert to Ordinal # Convert to Ordinal
# Dates are neuter ("число" implies neuter: "пятое", "пятого") # Dates are neuter ("число" implies neuter: "пятое", "пятого")
@@ -156,112 +170,119 @@ def numbers_to_words(text: str) -> str:
# 5, ordinal, genitive -> "пятого" (masc/neut are same) # 5, ordinal, genitive -> "пятого" (masc/neut are same)
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?) # 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому). # Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
words = convert_number(day_str, context_type='ordinal', case=case, gender='n') words = convert_number(day_str, context_type="ordinal", case=case, gender="n")
prefix = f"{prep} " if prep else "" prefix = f"{prep} " if prep else ""
return f"{prefix}{words} {month_word}" return f"{prefix}{words} {month_word}"
text = re.sub( text = re.sub(
r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b', r"(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(" + month_regex + r")\b",
replace_date_match, replace_date_match,
text text,
) )
# 3. Handle remaining numbers (Cardinals) # 3. Handle remaining numbers (Cardinals)
def replace_cardinal_match(match): def replace_cardinal_match(match):
prep = match.group(1) prep = match.group(1)
num_str = match.group(2) num_str = match.group(2)
case = 'nominative' case = "nominative"
if prep: if prep:
morph_case = get_case_from_preposition(prep.strip()) morph_case = get_case_from_preposition(prep.strip())
if morph_case: if morph_case:
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative') case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "nominative")
words = convert_number(num_str, context_type='cardinal', case=case) words = convert_number(num_str, context_type="cardinal", case=case)
prefix = f"{prep} " if prep else "" prefix = f"{prep} " if prep else ""
return f"{prefix}{words}" return f"{prefix}{words}"
text = re.sub( text = re.sub(
r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b', r"(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b",
replace_cardinal_match, replace_cardinal_match,
text text,
) )
return text return text
def clean_response(text: str) -> str: def clean_response(text: str, language: str = "ru") -> str:
""" """
Clean AI response from markdown formatting and special characters. Clean AI response from markdown formatting and special characters.
Args: Args:
text: Raw AI response with possible markdown text: Raw AI response with possible markdown
language: Target language for output (affects post-processing)
Returns: Returns:
Clean text suitable for TTS Clean text suitable for TTS
""" """
if not text: if not text:
return "" return ""
# Remove citation references like [1], [2], [citation], etc. # Remove citation references like [1], [2], [citation], etc.
# Using hex escapes for brackets to avoid escaping issues # Using hex escapes for brackets to avoid escaping issues
text = re.sub(r'\x5B\d+\x5D', '', text) text = re.sub(r"\x5B\d+\x5D", "", text)
text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE) text = re.sub(r"\x5Bcitation\s*needed\x5D", "", text, flags=re.IGNORECASE)
text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE) text = re.sub(r"\x5Bsource\x5D", "", text, flags=re.IGNORECASE)
# Remove markdown bold **text** and __text__ # Remove markdown bold **text** and __text__
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
text = re.sub(r'__(.+?)__', r'\1', text) text = re.sub(r"__(.+?)__", r"\1", text)
# Remove markdown italic *text* and _text_ # Remove markdown italic *text* and _text_
text = re.sub(r'\*(.+?)\*', r'\1', text) text = re.sub(r"\*(.+?)\*", r"\1", text)
text = re.sub(r'(?<!\w)_(.+?)_(?!\w)', r'\1', text) text = re.sub(r"(?<!\w)_(.+?)_(?!\w)", r"\1", text)
# Remove markdown strikethrough ~~text~~ # Remove markdown strikethrough ~~text~~
text = re.sub(r'~~(.+?)~~', r'\1', text) text = re.sub(r"~~(.+?)~~", r"\1", text)
# Remove markdown headers # ## ### etc. # Remove markdown headers # ## ### etc.
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE) text = re.sub(r"^#{1,6}\s*", "", text, flags=re.MULTILINE)
# Remove markdown links [text](url) -> text # Remove markdown links [text](url) -> text
text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text) text = re.sub(r"\x5B([^\x5D]+)\x5D\([^)]+\)", r"\1", text)
# Remove markdown images ![alt](url) # Remove markdown images ![alt](url)
text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text) text = re.sub(r"!\x5B([^\x5D]*)\x5D\([^)]+\)", "", text)
# Remove inline code `code` # Remove inline code `code`
text = re.sub(r'`([^`]+)`', r'\1', text) text = re.sub(r"`([^`]+)`", r"\1", text)
# Remove code blocks ```code``` # Remove code blocks ```code```
text = re.sub(r'```[\s\S]*?```', '', text) text = re.sub(r"```[\s\S]*?```", "", text)
# Remove markdown list markers (-, *, +, numbered) # Remove markdown list markers (-, *, +, numbered)
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE) text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE)
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE) text = re.sub(r"^\s*\d+\.\s+", "", text, flags=re.MULTILINE)
# Remove blockquotes # Remove blockquotes
text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE) text = re.sub(r"^\s*>\s*", "", text, flags=re.MULTILINE)
# Remove horizontal rules # Remove horizontal rules
text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE) text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
# Remove HTML tags if any # Remove HTML tags if any
text = re.sub(r'<[^>]+>', '', text) text = re.sub(r"<[^>]+>", "", text)
# Remove informal slang greetings at the beginning of sentences/responses # Remove informal slang greetings at the beginning of sentences/responses
text = re.sub(r'^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*', '', text, flags=re.IGNORECASE | re.MULTILINE) text = re.sub(
r"^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*",
# Convert numbers to words (Russian) "",
text = numbers_to_words(text) text,
flags=re.IGNORECASE | re.MULTILINE,
)
# Convert numbers to words only for Russian, and only if digits exist
if language == "ru" and re.search(r"\d", text):
text = numbers_to_words(text)
# Remove extra whitespace # Remove extra whitespace
text = re.sub(r'\n{3,}', '\n\n', text) text = re.sub(r"\n{3,}", "\n\n", text)
text = re.sub(r' +', ' ', text) text = re.sub(r" +", " ", text)
# Clean up and return # Clean up and return
text = text.strip() text = text.strip()
return text return text

View File

@@ -2,6 +2,7 @@
Configuration module for smart speaker. Configuration module for smart speaker.
Loads environment variables from .env file. Loads environment variables from .env file.
""" """
import os import os
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -31,6 +32,13 @@ VOSK_MODEL_PATH = BASE_DIR / "vosk-model-ru-0.42"
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000
CHANNELS = 1 CHANNELS = 1
# Set timezone to Moscow
import time
os.environ["TZ"] = "Europe/Moscow"
time.tzset()
# TTS configuration # TTS configuration
TTS_SPEAKER = "eugene" # Available: aidar, baya, kseniya, xenia, eugene TTS_SPEAKER = "eugene" # Available (ru): aidar, baya, kseniya, xenia, eugene
TTS_EN_SPEAKER = os.getenv("TTS_EN_SPEAKER", "en_0")
TTS_SAMPLE_RATE = 48000 TTS_SAMPLE_RATE = 48000

116
local_stt.py Normal file
View File

@@ -0,0 +1,116 @@
"""
Local offline Speech-to-Text module using Vosk.
Used for simple command detection (like "stop") without internet.
"""
import os
import sys
import json
import pyaudio
from vosk import Model, KaldiRecognizer
from config import VOSK_MODEL_PATH, SAMPLE_RATE
class LocalRecognizer:
def __init__(self):
self.model = None
self.rec = None
self.pa = None
self.stream = None
def initialize(self):
if not os.path.exists(VOSK_MODEL_PATH):
print(f"❌ Ошибка: Vosk модель не найдена по пути {VOSK_MODEL_PATH}")
return False
print("📦 Инициализация локального STT (Vosk)...")
# Redirect stderr to suppress Vosk logs
try:
null_fd = os.open(os.devnull, os.O_WRONLY)
old_stderr = os.dup(2)
sys.stderr.flush()
os.dup2(null_fd, 2)
os.close(null_fd)
self.model = Model(str(VOSK_MODEL_PATH))
# Restore stderr
os.dup2(old_stderr, 2)
os.close(old_stderr)
except Exception as e:
print(f"Error initializing Vosk: {e}")
return False
self.rec = KaldiRecognizer(self.model, SAMPLE_RATE)
self.pa = pyaudio.PyAudio()
return True
def listen_for_keywords(self, keywords: list, timeout: float = 10.0) -> str:
"""
Listen for specific keywords locally.
Returns the recognized keyword if found, or empty string.
"""
if not self.model:
if not self.initialize():
return ""
# Open stream
try:
stream = self.pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=4096)
stream.start_stream()
except Exception as e:
print(f"❌ Ошибка микрофона: {e}")
return ""
import time
start_time = time.time()
print(f"👂 Локальное слушание ожидает: {keywords}")
detected_text = ""
try:
while time.time() - start_time < timeout:
data = stream.read(4096, exception_on_overflow=False)
if self.rec.AcceptWaveform(data):
res = json.loads(self.rec.Result())
text = res.get("text", "")
if text:
print(f"📝 Локально: {text}")
# Check against keywords
for kw in keywords:
if kw in text:
detected_text = text
break
else:
# Partial result
res = json.loads(self.rec.PartialResult())
partial = res.get("partial", "")
if partial:
for kw in keywords:
if kw in partial:
detected_text = partial
break
if detected_text:
break
finally:
stream.stop_stream()
stream.close()
return detected_text
def cleanup(self):
if self.pa:
self.pa.terminate()
# Global instance
_local_recognizer = None
def get_local_recognizer():
global _local_recognizer
if _local_recognizer is None:
_local_recognizer = LocalRecognizer()
return _local_recognizer
def listen_for_keywords(keywords: list, timeout: float = 5.0) -> str:
"""Listen for keywords using Vosk."""
return get_local_recognizer().listen_for_keywords(keywords, timeout)

173
main.py
View File

@@ -13,14 +13,22 @@ Flow:
import signal import signal
import sys import sys
import re
import threading
from collections import deque from collections import deque
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once from wakeword import (
wait_for_wakeword,
cleanup as cleanup_wakeword,
check_wakeword_once,
stop_monitoring as stop_wakeword_monitoring,
)
from stt import listen, cleanup as cleanup_stt, get_recognizer from stt import listen, cleanup as cleanup_stt, get_recognizer
from ai import ask_ai from ai import ask_ai, translate_text
from cleaner import clean_response from cleaner import clean_response
from tts import speak, initialize as init_tts from tts import speak, initialize as init_tts
from sound_level import set_volume, parse_volume_text from sound_level import set_volume, parse_volume_text
from alarm import get_alarm_clock
def signal_handler(sig, frame): def signal_handler(sig, frame):
@@ -31,6 +39,37 @@ def signal_handler(sig, frame):
sys.exit(0) sys.exit(0)
def parse_translation_request(text: str):
"""
Detect translation commands and extract language direction and text.
Returns:
dict with source_lang, target_lang, text or None
"""
patterns = [
(r"^переведи на английский\s*(.*)$", "ru", "en"),
(r"^переведи на русский\s*(.*)$", "en", "ru"),
(r"^переведи с английского\s*(.*)$", "en", "ru"),
(r"^переведи с русского\s*(.*)$", "ru", "en"),
(r"^как по[-\s]?английски\s*(.*)$", "ru", "en"),
(r"^как по[-\s]?русски\s*(.*)$", "en", "ru"),
(r"^translate (?:to|into) english\s*(.*)$", "ru", "en"),
(r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"),
(r"^translate from english\s*(.*)$", "en", "ru"),
(r"^translate from russian\s*(.*)$", "ru", "en"),
]
for pattern, source_lang, target_lang in patterns:
match = re.match(pattern, text, flags=re.IGNORECASE)
if match:
return {
"source_lang": source_lang,
"target_lang": target_lang,
"text": match.group(1).strip(),
}
return None
def main(): def main():
"""Main application loop.""" """Main application loop."""
print("=" * 50) print("=" * 50)
@@ -46,8 +85,31 @@ def main():
# Pre-initialize models (takes a few seconds) # Pre-initialize models (takes a few seconds)
print("⏳ Инициализация моделей...") print("⏳ Инициализация моделей...")
get_recognizer().initialize() # Initialize STT model first init_errors = []
init_tts() # Then initialize TTS model
def init_stt():
try:
get_recognizer().initialize()
except Exception as e:
init_errors.append(e)
def init_tts_model():
try:
init_tts()
except Exception as e:
init_errors.append(e)
stt_thread = threading.Thread(target=init_stt, daemon=True)
tts_thread = threading.Thread(target=init_tts_model, daemon=True)
stt_thread.start()
tts_thread.start()
stt_thread.join()
tts_thread.join()
if init_errors:
raise init_errors[0]
alarm_clock = get_alarm_clock() # Initialize Alarm Clock
print() print()
# Initialize chat history (last 10 exchanges = 20 messages) # Initialize chat history (last 10 exchanges = 20 messages)
@@ -57,37 +119,58 @@ def main():
skip_wakeword = False skip_wakeword = False
while True: while True:
try: try:
# Ensure wake word detector stream is closed before listening
stop_wakeword_monitoring()
# Check for alarms every loop iteration
if alarm_clock.check_alarms():
# If alarm triggered and finished (user stopped it), we continue loop
# The alarm.trigger_alarm() blocks until stopped.
skip_wakeword = False # Reset state after alarm
continue
# Step 1: Wait for wake word or Follow-up listen # Step 1: Wait for wake word or Follow-up listen
if not skip_wakeword: if not skip_wakeword:
wait_for_wakeword() # Wait with timeout to allow alarm checking
detected = wait_for_wakeword(timeout=1.0)
# If timeout (not detected), loop again to check alarms
if not detected:
continue
# Standard listen after activation # Standard listen after activation
user_text = listen(timeout_seconds=7.0) user_text = listen(timeout_seconds=7.0)
else: else:
# Follow-up listen (wait 2.0s for start, then listen long) # Follow-up listen (wait 5.0s for start)
print("👂 Слушаю продолжение диалога...") print("👂 Слушаю продолжение диалога (5 сек)...")
user_text = listen(timeout_seconds=20.0, detection_timeout=2.0) user_text = listen(timeout_seconds=10.0, detection_timeout=5.0)
if not user_text: if not user_text:
# User didn't continue conversation, go back to sleep # User didn't continue conversation, go back to sleep silently
skip_wakeword = False skip_wakeword = False
continue continue
# Reset flag for now (will be set to True if we speak successfully)
skip_wakeword = False
# Step 2: Check if speech was recognized # Step 2: Check if speech was recognized
if not user_text: if not user_text:
# If this was a direct wake word activation but no speech
speak("Извините, я вас не расслышал. Попробуйте ещё раз.") speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
skip_wakeword = False # Reset to wake word
continue continue
# Check for stop commands # Check for stop commands
user_text_lower = user_text.lower().strip() user_text_lower = user_text.lower().strip()
if user_text_lower in ["стоп", "александр", "стоп александр"]: if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]:
print("_" * 50) print("_" * 50)
print("💤 Жду 'Alexandr' для активации...") print("💤 Жду 'Alexandr' для активации...")
skip_wakeword = False skip_wakeword = False
continue continue
# Check for alarm commands
alarm_response = alarm_clock.parse_command(user_text)
if alarm_response:
speak(alarm_response)
continue
# Check for volume command # Check for volume command
if user_text.lower().startswith("громкость"): if user_text.lower().startswith("громкость"):
try: try:
@@ -113,21 +196,67 @@ def main():
speak("Не удалось изменить громкость.") speak("Не удалось изменить громкость.")
continue continue
# Check for translation commands
translation_request = parse_translation_request(user_text)
if translation_request:
source_lang = translation_request["source_lang"]
target_lang = translation_request["target_lang"]
text_to_translate = translation_request["text"]
if not text_to_translate:
prompt = (
"Скажи фразу на английском."
if source_lang == "en"
else "Скажи фразу на русском."
)
speak(prompt)
text_to_translate = listen(
timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
)
if not text_to_translate:
speak("Я не расслышал текст для перевода.")
skip_wakeword = False
continue
translated_text = translate_text(
text_to_translate, source_lang, target_lang
)
clean_text = clean_response(translated_text, language=target_lang)
completed = speak(
clean_text,
check_interrupt=check_wakeword_once,
language=target_lang,
)
stop_wakeword_monitoring()
skip_wakeword = True
if not completed:
print("⏹️ Перевод прерван - слушаю следующий вопрос")
continue
# Step 3: Send to AI # Step 3: Send to AI
# Add user message to history # Add user message to history
chat_history.append({"role": "user", "content": user_text}) chat_history.append({"role": "user", "content": user_text})
# Get response using history # Get response using history
ai_response = ask_ai(list(chat_history)) ai_response = ask_ai(list(chat_history))
# Add AI response to history # Add AI response to history
chat_history.append({"role": "assistant", "content": ai_response}) chat_history.append({"role": "assistant", "content": ai_response})
# Step 4: Clean response # Step 4: Clean response
clean_text = clean_response(ai_response) clean_text = clean_response(ai_response, language="ru")
# Step 5: Speak response (with wake word interrupt support) # Step 5: Speak response (with wake word interrupt support)
completed = speak(clean_text, check_interrupt=check_wakeword_once) # This uses check_wakeword_once which opens/closes stream as needed
completed = speak(
clean_text, check_interrupt=check_wakeword_once, language="ru"
)
# Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once)
stop_wakeword_monitoring()
# Enable follow-up mode for next iteration # Enable follow-up mode for next iteration
skip_wakeword = True skip_wakeword = True
@@ -136,7 +265,12 @@ def main():
# but we can print a message # but we can print a message
if not completed: if not completed:
print("⏹️ Ответ прерван - слушаю следующий вопрос") print("⏹️ Ответ прерван - слушаю следующий вопрос")
continue # If interrupted, we treat it as immediate follow up?
# Usually interruption means "I have a new command"
# So skip_wakeword = True is correct.
# But we might want to listen IMMEDIATELY without waiting 5s for start?
# listen() handles that.
pass
print() print()
print("-" * 30) print("-" * 30)
@@ -149,6 +283,7 @@ def main():
except Exception as e: except Exception as e:
print(f"❌ Ошибка: {e}") print(f"❌ Ошибка: {e}")
speak("Произошла ошибка. Попробуйте ещё раз.") speak("Произошла ошибка. Попробуйте ещё раз.")
skip_wakeword = False
if __name__ == "__main__": if __name__ == "__main__":

62
stt.py
View File

@@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API.
Recognizes speech from microphone using streaming WebSocket. Recognizes speech from microphone using streaming WebSocket.
Supports Russian (default) and English. Supports Russian (default) and English.
""" """
import os import os
import asyncio import asyncio
import threading import threading
@@ -20,6 +21,7 @@ from deepgram import (
# Configure logging to suppress debug noise # Configure logging to suppress debug noise
logging.getLogger("deepgram").setLevel(logging.WARNING) logging.getLogger("deepgram").setLevel(logging.WARNING)
class SpeechRecognizer: class SpeechRecognizer:
"""Speech recognizer using Deepgram streaming.""" """Speech recognizer using Deepgram streaming."""
@@ -29,18 +31,18 @@ class SpeechRecognizer:
self.stream = None self.stream = None
self.transcript = "" self.transcript = ""
self.lock = threading.Lock() self.lock = threading.Lock()
def initialize(self): def initialize(self):
"""Initialize Deepgram client and PyAudio.""" """Initialize Deepgram client and PyAudio."""
if not DEEPGRAM_API_KEY: if not DEEPGRAM_API_KEY:
raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.") raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.")
print("📦 Инициализация Deepgram STT...") print("📦 Инициализация Deepgram STT...")
config = DeepgramClientOptions( config = DeepgramClientOptions(
verbose=logging.WARNING, verbose=logging.WARNING,
) )
self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config) self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config)
self.pa = pyaudio.PyAudio() self.pa = pyaudio.PyAudio()
print("✅ Deepgram клиент готов") print("✅ Deepgram клиент готов")
@@ -59,13 +61,14 @@ class SpeechRecognizer:
async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout): async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
"""Async loop to send audio and wait for results.""" """Async loop to send audio and wait for results."""
self.transcript = "" self.transcript = ""
transcript_parts = []
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
stream = self._get_stream() stream = self._get_stream()
stop_event = asyncio.Event() stop_event = asyncio.Event()
speech_started_event = asyncio.Event() speech_started_event = asyncio.Event()
# We need access to the outer 'self' (SpeechRecognizer instance) # We need access to the outer 'self' (SpeechRecognizer instance)
speech_recognizer_self = self speech_recognizer_self = self
@@ -74,9 +77,11 @@ class SpeechRecognizer:
if len(sentence) == 0: if len(sentence) == 0:
return return
if result.is_final: if result.is_final:
print(f"📝 Частичный результат: {sentence}")
with speech_recognizer_self.lock: with speech_recognizer_self.lock:
speech_recognizer_self.transcript = sentence transcript_parts.append(sentence)
speech_recognizer_self.transcript = " ".join(
transcript_parts
).strip()
def on_speech_started(unused_self, speech_started, **kwargs): def on_speech_started(unused_self, speech_started, **kwargs):
loop.call_soon_threadsafe(speech_started_event.set) loop.call_soon_threadsafe(speech_started_event.set)
@@ -102,7 +107,7 @@ class SpeechRecognizer:
channels=1, channels=1,
sample_rate=SAMPLE_RATE, sample_rate=SAMPLE_RATE,
interim_results=True, interim_results=True,
utterance_end_ms="1200", utterance_end_ms=1200,
vad_events=True, vad_events=True,
) )
@@ -133,38 +138,45 @@ class SpeechRecognizer:
print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}") print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}")
sender_task = asyncio.create_task(send_audio()) sender_task = asyncio.create_task(send_audio())
try: try:
# 1. Wait for speech to start (detection_timeout) # 1. Wait for speech to start (detection_timeout)
if detection_timeout: if detection_timeout:
try: try:
await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout) await asyncio.wait_for(
speech_started_event.wait(), timeout=detection_timeout
)
except asyncio.TimeoutError: except asyncio.TimeoutError:
# print("Detection timeout - no speech") # print("Detection timeout - no speech")
stop_event.set() stop_event.set()
# 2. If started (or no detection timeout), wait for completion # 2. If started (or no detection timeout), wait for completion
if not stop_event.is_set(): if not stop_event.is_set():
await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds) await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds)
except asyncio.TimeoutError: except asyncio.TimeoutError:
# print("Global timeout") # print("Global timeout")
pass pass
stop_event.set() stop_event.set()
await sender_task await sender_task
# Finish is synchronous # Finish is synchronous
dg_connection.finish() dg_connection.finish()
return self.transcript return self.transcript
def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str: def listen(
self,
timeout_seconds: float = 7.0,
detection_timeout: float = None,
lang: str = "ru",
) -> str:
""" """
Listen to microphone and transcribe speech. Listen to microphone and transcribe speech.
""" """
if not self.dg_client: if not self.dg_client:
self.initialize() self.initialize()
self.current_lang = lang self.current_lang = lang
print(f"🎙️ Слушаю ({lang})...") print(f"🎙️ Слушаю ({lang})...")
@@ -172,16 +184,18 @@ class SpeechRecognizer:
dg_connection = self.dg_client.listen.live.v("1") dg_connection = self.dg_client.listen.live.v("1")
try: try:
transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout)) transcript = asyncio.run(
self._process_audio(dg_connection, timeout_seconds, detection_timeout)
)
final_text = transcript.strip() if transcript else "" final_text = transcript.strip() if transcript else ""
if final_text: if final_text:
print(f"📝 Распознано: {final_text}") print(f"📝 Распознано: {final_text}")
else: else:
print("⚠️ Речь не распознана") print("⚠️ Речь не распознана")
return final_text return final_text
except Exception as e: except Exception as e:
print(f"❌ Ошибка STT: {e}") print(f"❌ Ошибка STT: {e}")
return "" return ""
@@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer:
return _recognizer return _recognizer
def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str: def listen(
timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru"
) -> str:
"""Listen to microphone and return transcribed text.""" """Listen to microphone and return transcribed text."""
return get_recognizer().listen(timeout_seconds, detection_timeout, lang) return get_recognizer().listen(timeout_seconds, detection_timeout, lang)
@@ -218,4 +234,4 @@ def cleanup():
global _recognizer global _recognizer
if _recognizer: if _recognizer:
_recognizer.cleanup() _recognizer.cleanup()
_recognizer = None _recognizer = None

70
tts.py
View File

@@ -11,7 +11,7 @@ import threading
import time import time
import warnings import warnings
import re import re
from config import TTS_SPEAKER, TTS_SAMPLE_RATE from config import TTS_SPEAKER, TTS_EN_SPEAKER, TTS_SAMPLE_RATE
# Suppress Silero TTS warning about text length # Suppress Silero TTS warning about text length
warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols") warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols")
@@ -21,27 +21,55 @@ class TextToSpeech:
"""Text-to-Speech using Silero TTS with wake word interruption support.""" """Text-to-Speech using Silero TTS with wake word interruption support."""
def __init__(self): def __init__(self):
self.model = None self.models = {}
self.sample_rate = TTS_SAMPLE_RATE self.sample_rate = TTS_SAMPLE_RATE
self.speaker = TTS_SPEAKER self.speakers = {
"ru": TTS_SPEAKER,
"en": TTS_EN_SPEAKER,
}
self._interrupted = False self._interrupted = False
self._stop_flag = threading.Event() self._stop_flag = threading.Event()
def initialize(self): def _load_model(self, language: str):
"""Initialize Silero TTS model.""" """Load and cache Silero TTS model for the given language."""
print("📦 Загрузка модели Silero TTS v5...") if language in self.models:
return self.models[language]
# Load Silero TTS model model_config = {
device = torch.device('cpu') "ru": {"language": "ru", "model_id": "v5_ru"},
self.model, _ = torch.hub.load( "en": {"language": "en", "model_id": "v3_en"},
}
if language not in model_config:
raise ValueError(f"Unsupported TTS language: {language}")
config = model_config[language]
print(f"📦 Загрузка модели Silero TTS ({language})...")
device = torch.device("cpu")
model, _ = torch.hub.load(
repo_or_dir="snakers4/silero-models", repo_or_dir="snakers4/silero-models",
model="silero_tts", model="silero_tts",
language="ru", language=config["language"],
speaker="v5_ru", speaker=config["model_id"],
) )
self.model.to(device) model.to(device)
print(f"✅ Модель TTS v5 загружена (голос: {self.speaker})") self.models[language] = model
return model
def _get_speaker(self, language: str, model) -> str:
"""Return a valid speaker for the loaded model."""
speaker = self.speakers.get(language)
if hasattr(model, "speakers") and speaker not in model.speakers:
fallback = model.speakers[0] if model.speakers else speaker
print(f"⚠️ Голос '{speaker}' недоступен, использую '{fallback}'")
return fallback
return speaker
def initialize(self):
"""Initialize default (Russian) TTS model."""
self._load_model("ru")
def _split_text(self, text: str, max_length: int = 900) -> list[str]: def _split_text(self, text: str, max_length: int = 900) -> list[str]:
"""Split text into chunks smaller than max_length.""" """Split text into chunks smaller than max_length."""
@@ -83,13 +111,14 @@ class TextToSpeech:
# Filter empty chunks # Filter empty chunks
return [c for c in chunks if c] return [c for c in chunks if c]
def speak(self, text: str, check_interrupt=None) -> bool: def speak(self, text: str, check_interrupt=None, language: str = "ru") -> bool:
""" """
Convert text to speech and play it. Convert text to speech and play it.
Args: Args:
text: Text to synthesize and speak text: Text to synthesize and speak
check_interrupt: Optional callback function that returns True if playback should stop check_interrupt: Optional callback function that returns True if playback should stop
language: Language code for voice selection ("ru" or "en")
Returns: Returns:
True if playback completed normally, False if interrupted True if playback completed normally, False if interrupted
@@ -97,8 +126,8 @@ class TextToSpeech:
if not text.strip(): if not text.strip():
return True return True
if not self.model: model = self._load_model(language)
self.initialize() speaker = self._get_speaker(language, model)
# Split text into manageable chunks # Split text into manageable chunks
chunks = self._split_text(text) chunks = self._split_text(text)
@@ -120,8 +149,8 @@ class TextToSpeech:
try: try:
# Generate audio for chunk # Generate audio for chunk
audio = self.model.apply_tts( audio = model.apply_tts(
text=chunk, speaker=self.speaker, sample_rate=self.sample_rate text=chunk, speaker=speaker, sample_rate=self.sample_rate
) )
# Convert to numpy array # Convert to numpy array
@@ -218,18 +247,19 @@ def get_tts() -> TextToSpeech:
return _tts return _tts
def speak(text: str, check_interrupt=None) -> bool: def speak(text: str, check_interrupt=None, language: str = "ru") -> bool:
""" """
Synthesize and speak the given text. Synthesize and speak the given text.
Args: Args:
text: Text to speak text: Text to speak
check_interrupt: Optional callback for interrupt checking check_interrupt: Optional callback for interrupt checking
language: Language code for voice selection ("ru" or "en")
Returns: Returns:
True if completed normally, False if interrupted True if completed normally, False if interrupted
""" """
return get_tts().speak(text, check_interrupt) return get_tts().speak(text, check_interrupt, language)
def was_interrupted() -> bool: def was_interrupted() -> bool:

View File

@@ -15,6 +15,7 @@ class WakeWordDetector:
self.porcupine = None self.porcupine = None
self.audio_stream = None self.audio_stream = None
self.pa = None self.pa = None
self._stream_closed = True # Track state explicitly
def initialize(self): def initialize(self):
"""Initialize Porcupine and audio stream.""" """Initialize Porcupine and audio stream."""
@@ -24,6 +25,19 @@ class WakeWordDetector:
) )
self.pa = pyaudio.PyAudio() self.pa = pyaudio.PyAudio()
self._open_stream()
print("🎤 Ожидание wake word 'Alexandr'...")
def _open_stream(self):
"""Open the audio stream."""
if self.audio_stream and not self._stream_closed:
return
if self.audio_stream:
try:
self.audio_stream.close()
except: pass
self.audio_stream = self.pa.open( self.audio_stream = self.pa.open(
rate=self.porcupine.sample_rate, rate=self.porcupine.sample_rate,
channels=1, channels=1,
@@ -31,44 +45,47 @@ class WakeWordDetector:
input=True, input=True,
frames_per_buffer=self.porcupine.frame_length frames_per_buffer=self.porcupine.frame_length
) )
print("🎤 Ожидание wake word 'Alexandr'...") self._stream_closed = False
def wait_for_wakeword(self) -> bool: def stop_monitoring(self):
"""Explicitly stop and close the stream."""
if self.audio_stream and not self._stream_closed:
try:
self.audio_stream.stop_stream()
self.audio_stream.close()
except: pass
self._stream_closed = True
def wait_for_wakeword(self, timeout: float = None) -> bool:
""" """
Blocks until wake word is detected. Blocks until wake word is detected or timeout expires.
Returns True when wake word is detected.
Args:
timeout: Maximum seconds to wait. None = infinite.
Returns:
True if wake word detected, False if timeout.
""" """
import time
if not self.porcupine: if not self.porcupine:
self.initialize() self.initialize()
# Ensure stream is open and active # Ensure stream is open
if self.audio_stream is None or not self.audio_stream.is_active(): self._open_stream()
# If closed or None, we might need to recreate it.
# PyAudio streams once closed cannot be reopened usually? start_time = time.time()
# We should probably recreate it.
if self.audio_stream:
try:
self.audio_stream.close()
except: pass
self.audio_stream = self.pa.open(
rate=self.porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=self.porcupine.frame_length
)
while True: while True:
if timeout and (time.time() - start_time > timeout):
return False
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False) pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm) pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
keyword_index = self.porcupine.process(pcm) keyword_index = self.porcupine.process(pcm)
if keyword_index >= 0: if keyword_index >= 0:
print("✅ Wake word обнаружен!") print("✅ Wake word обнаружен!")
# Stop and CLOSE stream to release mic for STT self.stop_monitoring()
self.audio_stream.stop_stream()
self.audio_stream.close()
return True return True
def check_wakeword_once(self) -> bool: def check_wakeword_once(self) -> bool:
@@ -80,20 +97,8 @@ class WakeWordDetector:
self.initialize() self.initialize()
try: try:
# Ensure stream is open/active # Ensure stream is open
if self.audio_stream is None or not self.audio_stream.is_active(): self._open_stream()
# Re-open if needed (similar to wait_for_wakeword logic)
if self.audio_stream:
try:
self.audio_stream.close()
except: pass
self.audio_stream = self.pa.open(
rate=self.porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=self.porcupine.frame_length
)
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False) pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm) pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
@@ -108,8 +113,7 @@ class WakeWordDetector:
def cleanup(self): def cleanup(self):
"""Release resources.""" """Release resources."""
if self.audio_stream: self.stop_monitoring()
self.audio_stream.close()
if self.pa: if self.pa:
self.pa.terminate() self.pa.terminate()
if self.porcupine: if self.porcupine:
@@ -128,10 +132,14 @@ def get_detector() -> WakeWordDetector:
return _detector return _detector
def wait_for_wakeword() -> bool: def wait_for_wakeword(timeout: float = None) -> bool:
"""Wait for wake word detection.""" """Wait for wake word detection."""
return get_detector().wait_for_wakeword() return get_detector().wait_for_wakeword(timeout)
def stop_monitoring():
"""Stop monitoring for wake word."""
if _detector:
_detector.stop_monitoring()
def cleanup(): def cleanup():
"""Cleanup detector resources.""" """Cleanup detector resources."""