diff --git a/Apex-1.mp3 b/Apex-1.mp3 new file mode 100644 index 0000000..37a33ad Binary files /dev/null and b/Apex-1.mp3 differ diff --git a/ai.py b/ai.py index 9deeaac..00919e9 100644 --- a/ai.py +++ b/ai.py @@ -16,6 +16,10 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов. ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные.""" +TRANSLATION_SYSTEM_PROMPT = """You are a translation engine. +Translate from {source} to {target}. +Return only the translated text, without quotes, comments, or explanations.""" + def ask_ai(messages_history: list) -> str: """ @@ -72,3 +76,66 @@ def ask_ai(messages_history: list) -> str: except (KeyError, IndexError) as e: print(f"❌ Ошибка парсинга ответа: {e}") return "Не удалось обработать ответ от AI." + + +def translate_text(text: str, source_lang: str, target_lang: str) -> str: + """ + Translate text using Perplexity AI. + + Args: + text: Text to translate + source_lang: Source language code ("ru" or "en") + target_lang: Target language code ("ru" or "en") + + Returns: + Translated text + """ + if not text: + return "Извините, я не расслышал текст для перевода." + + lang_names = {"ru": "Russian", "en": "English"} + source_name = lang_names.get(source_lang, source_lang) + target_name = lang_names.get(target_lang, target_lang) + + print(f"🌍 Перевод: {source_name} -> {target_name}: {text[:60]}...") + + headers = { + "Authorization": f"Bearer {PERPLEXITY_API_KEY}", + "Content-Type": "application/json", + } + + messages = [ + { + "role": "system", + "content": TRANSLATION_SYSTEM_PROMPT.format( + source=source_name, target=target_name + ), + }, + {"role": "user", "content": text}, + ] + + payload = { + "model": PERPLEXITY_MODEL, + "messages": messages, + "max_tokens": 400, + "temperature": 0.2, + } + + try: + response = requests.post( + PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30 + ) + response.raise_for_status() + + data = response.json() + ai_response = data["choices"][0]["message"]["content"] + return ai_response.strip() + + except requests.exceptions.Timeout: + return "Извините, сервер не отвечает. Попробуйте позже." + except requests.exceptions.RequestException as e: + print(f"❌ Ошибка API перевода: {e}") + return "Произошла ошибка при переводе. Попробуйте ещё раз." + except (KeyError, IndexError) as e: + print(f"❌ Ошибка парсинга ответа перевода: {e}") + return "Не удалось обработать перевод." diff --git a/alarm.py b/alarm.py new file mode 100644 index 0000000..a710304 --- /dev/null +++ b/alarm.py @@ -0,0 +1,194 @@ +""" +Alarm clock module. +Handles alarm scheduling, persistence, and playback. +""" +import json +import time +import subprocess +import re +import threading +from datetime import datetime +from pathlib import Path +from config import BASE_DIR +from local_stt import listen_for_keywords + +ALARM_FILE = BASE_DIR / "alarms.json" +ALARM_SOUND = BASE_DIR / "Apex-1.mp3" + +class AlarmClock: + def __init__(self): + self.alarms = [] + self.load_alarms() + + def load_alarms(self): + """Load alarms from JSON file.""" + if ALARM_FILE.exists(): + try: + with open(ALARM_FILE, "r", encoding="utf-8") as f: + self.alarms = json.load(f) + except Exception as e: + print(f"❌ Ошибка загрузки будильников: {e}") + self.alarms = [] + + def save_alarms(self): + """Save alarms to JSON file.""" + try: + with open(ALARM_FILE, "w", encoding="utf-8") as f: + json.dump(self.alarms, f, indent=4) + except Exception as e: + print(f"❌ Ошибка сохранения будильников: {e}") + + def add_alarm(self, hour: int, minute: int): + """Add a new alarm.""" + # Check if already exists + for alarm in self.alarms: + if alarm["hour"] == hour and alarm["minute"] == minute: + alarm["active"] = True + self.save_alarms() + return + + self.alarms.append({ + "hour": hour, + "minute": minute, + "active": True + }) + self.save_alarms() + print(f"⏰ Будильник установлен на {hour:02d}:{minute:02d}") + + def cancel_all_alarms(self): + """Cancel all active alarms.""" + for alarm in self.alarms: + alarm["active"] = False + self.save_alarms() + print("🔕 Все будильники отменены.") + + def check_alarms(self): + """Check if any alarm should trigger now. Returns True if triggered.""" + now = datetime.now() + triggered = False + + for alarm in self.alarms: + if alarm["active"]: + if alarm["hour"] == now.hour and alarm["minute"] == now.minute: + # Prevent re-triggering within the same minute? + # We should disable it immediately or track last trigger time. + # For simple logic: disable it (one-time alarm). + + # But wait, checking every second? + # If I disable it, it won't ring for the whole minute. + # Correct. + print(f"⏰ ВРЕМЯ БУДИЛЬНИКА: {alarm['hour']:02d}:{alarm['minute']:02d}") + alarm["active"] = False + triggered = True + self.trigger_alarm() + break # Trigger one at a time + + if triggered: + self.save_alarms() + return True + return False + + def trigger_alarm(self): + """Play alarm sound and wait for stop command.""" + print("🔔 БУДИЛЬНИК ЗВОНИТ! (Скажите 'Стоп' или 'Александр стоп')") + + # Start playing sound in loop + # -q for quiet (no output) + # --loop -1 for infinite loop + cmd = ["mpg123", "-q", "--loop", "-1", str(ALARM_SOUND)] + + try: + process = subprocess.Popen(cmd) + except FileNotFoundError: + print("❌ Ошибка: mpg123 не найден. Установите его: sudo apt install mpg123") + return + + try: + # Listen for stop command using local Vosk + # Loop until stop word is heard + stop_words = ["стоп", "хватит", "тихо", "замолчи", "отмена", "александр стоп"] + + while True: + # Listen in short bursts to be responsive + text = listen_for_keywords(stop_words, timeout=3.0) + if text: + print(f"🛑 Будильник остановлен по команде: '{text}'") + break + + except Exception as e: + print(f"❌ Ошибка во время будильника: {e}") + finally: + # Kill the player + process.terminate() + try: + process.wait(timeout=1) + except subprocess.TimeoutExpired: + process.kill() + print("🔕 Будильник выключен.") + + def parse_command(self, text: str) -> str | None: + """ + Parse user text for alarm commands. + Returns response string if command handled, None otherwise. + """ + text = text.lower() + if "будильник" not in text and "разбуди" not in text: + return None + + if "отмени" in text: + self.cancel_all_alarms() + return "Хорошо, я отменил все будильники." + + # Regex to find time: HH:MM, HH-MM, HH MM, HH часов MM минут + # 1. "07:30", "7:30" + match = re.search(r'\b(\d{1,2})[:.-](\d{2})\b', text) + if match: + h, m = int(match.group(1)), int(match.group(2)) + if 0 <= h <= 23 and 0 <= m <= 59: + self.add_alarm(h, m) + return f"Я установил будильник на {h} часов {m} минут." + + # 2. "7 часов 30 минут" or "7 30" + # Search for pattern: digits ... (digits)? + # Complex to separate from other numbers. + + # Simple heuristics: + words = text.split() + nums = [int(s) for s in text.split() if s.isdigit()] + + # "на 7" -> 7:00 + if "на" in words or "в" in words: + # Try to find number after preposition + pass + + # Let's rely on explicit digit search if regex failed + # Patterns: "на 8", "на 8 30", "на 8 часов 30 минут", "на 8 часов" + + # Regex to capture hour and optional minute + # Matches: "на [часов] [M] [минут]" + match_time = re.search(r'на\s+(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?', text) + + if match_time: + h = int(match_time.group(1)) + m = int(match_time.group(2)) if match_time.group(2) else 0 + + # Handle AM/PM if specified + if "вечера" in text and h < 12: + h += 12 + elif "утра" in text and h == 12: + h = 0 + + if 0 <= h <= 23 and 0 <= m <= 59: + self.add_alarm(h, m) + return f"Хорошо, разбужу вас в {h}:{m:02d}." + + return "Я не понял время для будильника. Пожалуйста, скажите точное время, например 'семь тридцать'." + +# Global instance +_alarm_clock = None + +def get_alarm_clock(): + global _alarm_clock + if _alarm_clock is None: + _alarm_clock = AlarmClock() + return _alarm_clock diff --git a/alarms.json b/alarms.json new file mode 100644 index 0000000..c6998bd --- /dev/null +++ b/alarms.json @@ -0,0 +1,12 @@ +[ + { + "hour": 10, + "minute": 15, + "active": true + }, + { + "hour": 3, + "minute": 42, + "active": false + } +] \ No newline at end of file diff --git a/cleaner.py b/cleaner.py index dfc2f29..f338bd3 100644 --- a/cleaner.py +++ b/cleaner.py @@ -3,6 +3,7 @@ Response cleaner module. Removes markdown formatting and special characters from AI responses. Handles complex number-to-text conversion for Russian language. """ + import re import pymorphy3 from num2words import num2words @@ -12,79 +13,86 @@ morph = pymorphy3.MorphAnalyzer() # Preposition to case mapping (simplified heuristics) PREPOSITION_CASES = { - 'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct - 'во': 'loct', - 'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe) - 'о': 'loct', - 'об': 'loct', - 'обо': 'loct', - 'при': 'loct', - 'у': 'gent', - 'от': 'gent', - 'до': 'gent', - 'из': 'gent', - 'с': 'gent', # or ablt (instrumental) - 'со': 'gent', - 'без': 'gent', - 'для': 'gent', - 'вокруг': 'gent', - 'после': 'gent', - 'к': 'datv', - 'ко': 'datv', - 'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually. - 'над': 'ablt', - 'под': 'ablt', - 'перед': 'ablt', - 'за': 'ablt', # or acc - 'между': 'ablt', + "в": "loct", # Prepositional (Locative 2) or Accusative. 'v godu' -> loct + "во": "loct", + "на": "accs", # Dates: 'na 5 maya' -> Accusative (na pyatoe) + "о": "loct", + "об": "loct", + "обо": "loct", + "при": "loct", + "у": "gent", + "от": "gent", + "до": "gent", + "из": "gent", + "с": "gent", # or ablt (instrumental) + "со": "gent", + "без": "gent", + "для": "gent", + "вокруг": "gent", + "после": "gent", + "к": "datv", + "ко": "datv", + "по": "datv", # or accs for dates (limit). Heuristic: datv defaults usually. + "над": "ablt", + "под": "ablt", + "перед": "ablt", + "за": "ablt", # or acc + "между": "ablt", } # Mapping pymorphy cases to num2words cases PYMORPHY_TO_NUM2WORDS = { - 'nomn': 'nominative', - 'gent': 'genitive', - 'datv': 'dative', - 'accs': 'accusative', - 'ablt': 'instrumental', - 'loct': 'prepositional', - 'voct': 'nominative', # Fallback - 'gen2': 'genitive', - 'acc2': 'accusative', - 'loc2': 'prepositional', + "nomn": "nominative", + "gent": "genitive", + "datv": "dative", + "accs": "accusative", + "ablt": "instrumental", + "loct": "prepositional", + "voct": "nominative", # Fallback + "gen2": "genitive", + "acc2": "accusative", + "loc2": "prepositional", } # Month names in Genitive case (as they appear in dates) MONTHS_GENITIVE = [ - 'января', 'февраля', 'марта', 'апреля', 'мая', 'июня', - 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря' + "января", + "февраля", + "марта", + "апреля", + "мая", + "июня", + "июля", + "августа", + "сентября", + "октября", + "ноября", + "декабря", ] + def get_case_from_preposition(prep_token): """Return pymorphy case based on preposition.""" if not prep_token: return None return PREPOSITION_CASES.get(prep_token.lower()) -def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'): + +def convert_number(number_str, context_type="cardinal", case="nominative", gender="m"): """Convert a number string to words with specific parameters.""" try: # Handle floats - if '.' in number_str or ',' in number_str: - num_val = float(number_str.replace(',', '.')) + if "." in number_str or "," in number_str: + num_val = float(number_str.replace(",", ".")) else: num_val = int(number_str) - - return num2words( - num_val, - lang='ru', - to=context_type, - case=case, - gender=gender - ) + + return num2words(num_val, lang="ru", to=context_type, case=case, gender=gender) except Exception as e: print(f"Error converting number {number_str}: {e}") return number_str + def numbers_to_words(text: str) -> str: """ Intelligent conversion of digits in text to Russian words. @@ -96,59 +104,65 @@ def numbers_to_words(text: str) -> str: # 1. Identify "Year" patterns: "1999 год", "в 2024 году" def replace_year_match(match): full_str = match.group(0) - prep = match.group(1) # Could be None + prep = match.group(1) # Could be None year_str = match.group(2) - year_word = match.group(3) # год, году, года... - + year_word = match.group(3) # год, году, года... + parsed = morph.parse(year_word)[0] case_tag = parsed.tag.case - - if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']: - pass - nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative') - - words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m') - + if ( + prep + and prep.strip().lower() in ["в", "во"] + and case_tag in ["accs", "nomn"] + ): + pass + + nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative") + + words = convert_number( + year_str, context_type="ordinal", case=nw_case, gender="m" + ) + prefix = f"{prep} " if prep else "" return f"{prefix}{words} {year_word}" text = re.sub( - r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b', + r"(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b", replace_year_match, - text + text, ) # 2. Identify "Date" patterns: "25 июня", "с 1 мая" # Matches: (Preposition)? (Day) (Month_Genitive) # Day is usually 1-31. - month_regex = '|'.join(MONTHS_GENITIVE) - + month_regex = "|".join(MONTHS_GENITIVE) + def replace_date_match(match): prep = match.group(1) day_str = match.group(2) month_word = match.group(3) - + # Determine case # Default to Genitive ("25 июня" -> "двадцать пятого июня") - case = 'genitive' - + case = "genitive" + if prep: prep_clean = prep.strip().lower() # Specific overrides for dates - if prep_clean == 'на': - case = 'accusative' # на 5 мая -> на пятое - elif prep_clean == 'по': - case = 'accusative' # по 5 мая -> по пятое (limit) - elif prep_clean == 'к': - case = 'dative' # к 5 мая -> к пятому - elif prep_clean in ['с', 'до', 'от']: - case = 'genitive' # с 5 мая -> с пятого + if prep_clean == "на": + case = "accusative" # на 5 мая -> на пятое + elif prep_clean == "по": + case = "accusative" # по 5 мая -> по пятое (limit) + elif prep_clean == "к": + case = "dative" # к 5 мая -> к пятому + elif prep_clean in ["с", "до", "от"]: + case = "genitive" # с 5 мая -> с пятого else: # Fallback to general preposition map morph_case = get_case_from_preposition(prep_clean) if morph_case: - case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive') + case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "genitive") # Convert to Ordinal # Dates are neuter ("число" implies neuter: "пятое", "пятого") @@ -156,112 +170,119 @@ def numbers_to_words(text: str) -> str: # 5, ordinal, genitive -> "пятого" (masc/neut are same) # 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?) # Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому). - - words = convert_number(day_str, context_type='ordinal', case=case, gender='n') - + + words = convert_number(day_str, context_type="ordinal", case=case, gender="n") + prefix = f"{prep} " if prep else "" return f"{prefix}{words} {month_word}" text = re.sub( - r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b', + r"(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(" + month_regex + r")\b", replace_date_match, - text + text, ) # 3. Handle remaining numbers (Cardinals) def replace_cardinal_match(match): prep = match.group(1) num_str = match.group(2) - - case = 'nominative' + + case = "nominative" if prep: morph_case = get_case_from_preposition(prep.strip()) if morph_case: - case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative') - - words = convert_number(num_str, context_type='cardinal', case=case) - + case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "nominative") + + words = convert_number(num_str, context_type="cardinal", case=case) + prefix = f"{prep} " if prep else "" return f"{prefix}{words}" text = re.sub( - r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b', + r"(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b", replace_cardinal_match, - text + text, ) - + return text -def clean_response(text: str) -> str: +def clean_response(text: str, language: str = "ru") -> str: """ Clean AI response from markdown formatting and special characters. - + Args: text: Raw AI response with possible markdown - + language: Target language for output (affects post-processing) + Returns: Clean text suitable for TTS """ if not text: return "" - + # Remove citation references like [1], [2], [citation], etc. # Using hex escapes for brackets to avoid escaping issues - text = re.sub(r'\x5B\d+\x5D', '', text) - text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE) - text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE) - + text = re.sub(r"\x5B\d+\x5D", "", text) + text = re.sub(r"\x5Bcitation\s*needed\x5D", "", text, flags=re.IGNORECASE) + text = re.sub(r"\x5Bsource\x5D", "", text, flags=re.IGNORECASE) + # Remove markdown bold **text** and __text__ - text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) - text = re.sub(r'__(.+?)__', r'\1', text) - + text = re.sub(r"\*\*(.+?)\*\*", r"\1", text) + text = re.sub(r"__(.+?)__", r"\1", text) + # Remove markdown italic *text* and _text_ - text = re.sub(r'\*(.+?)\*', r'\1', text) - text = re.sub(r'(? text - text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text) - + text = re.sub(r"\x5B([^\x5D]+)\x5D\([^)]+\)", r"\1", text) + # Remove markdown images ![alt](url) - text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text) - + text = re.sub(r"!\x5B([^\x5D]*)\x5D\([^)]+\)", "", text) + # Remove inline code `code` - text = re.sub(r'`([^`]+)`', r'\1', text) - + text = re.sub(r"`([^`]+)`", r"\1", text) + # Remove code blocks ```code``` - text = re.sub(r'```[\s\S]*?```', '', text) - + text = re.sub(r"```[\s\S]*?```", "", text) + # Remove markdown list markers (-, *, +, numbered) - text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE) - text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE) - + text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE) + text = re.sub(r"^\s*\d+\.\s+", "", text, flags=re.MULTILINE) + # Remove blockquotes - text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE) - + text = re.sub(r"^\s*>\s*", "", text, flags=re.MULTILINE) + # Remove horizontal rules - text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE) - + text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE) + # Remove HTML tags if any - text = re.sub(r'<[^>]+>', '', text) + text = re.sub(r"<[^>]+>", "", text) # Remove informal slang greetings at the beginning of sentences/responses - text = re.sub(r'^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*', '', text, flags=re.IGNORECASE | re.MULTILINE) - - # Convert numbers to words (Russian) - text = numbers_to_words(text) - + text = re.sub( + r"^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*", + "", + text, + flags=re.IGNORECASE | re.MULTILINE, + ) + + # Convert numbers to words only for Russian, and only if digits exist + if language == "ru" and re.search(r"\d", text): + text = numbers_to_words(text) + # Remove extra whitespace - text = re.sub(r'\n{3,}', '\n\n', text) - text = re.sub(r' +', ' ', text) - + text = re.sub(r"\n{3,}", "\n\n", text) + text = re.sub(r" +", " ", text) + # Clean up and return text = text.strip() - - return text \ No newline at end of file + + return text diff --git a/config.py b/config.py index 8c2c978..1b083f6 100644 --- a/config.py +++ b/config.py @@ -2,6 +2,7 @@ Configuration module for smart speaker. Loads environment variables from .env file. """ + import os from pathlib import Path from dotenv import load_dotenv @@ -31,6 +32,13 @@ VOSK_MODEL_PATH = BASE_DIR / "vosk-model-ru-0.42" SAMPLE_RATE = 16000 CHANNELS = 1 +# Set timezone to Moscow +import time + +os.environ["TZ"] = "Europe/Moscow" +time.tzset() + # TTS configuration -TTS_SPEAKER = "eugene" # Available: aidar, baya, kseniya, xenia, eugene +TTS_SPEAKER = "eugene" # Available (ru): aidar, baya, kseniya, xenia, eugene +TTS_EN_SPEAKER = os.getenv("TTS_EN_SPEAKER", "en_0") TTS_SAMPLE_RATE = 48000 diff --git a/local_stt.py b/local_stt.py new file mode 100644 index 0000000..f53d648 --- /dev/null +++ b/local_stt.py @@ -0,0 +1,116 @@ +""" +Local offline Speech-to-Text module using Vosk. +Used for simple command detection (like "stop") without internet. +""" +import os +import sys +import json +import pyaudio +from vosk import Model, KaldiRecognizer +from config import VOSK_MODEL_PATH, SAMPLE_RATE + +class LocalRecognizer: + def __init__(self): + self.model = None + self.rec = None + self.pa = None + self.stream = None + + def initialize(self): + if not os.path.exists(VOSK_MODEL_PATH): + print(f"❌ Ошибка: Vosk модель не найдена по пути {VOSK_MODEL_PATH}") + return False + + print("📦 Инициализация локального STT (Vosk)...") + # Redirect stderr to suppress Vosk logs + try: + null_fd = os.open(os.devnull, os.O_WRONLY) + old_stderr = os.dup(2) + sys.stderr.flush() + os.dup2(null_fd, 2) + os.close(null_fd) + + self.model = Model(str(VOSK_MODEL_PATH)) + + # Restore stderr + os.dup2(old_stderr, 2) + os.close(old_stderr) + except Exception as e: + print(f"Error initializing Vosk: {e}") + return False + + self.rec = KaldiRecognizer(self.model, SAMPLE_RATE) + self.pa = pyaudio.PyAudio() + return True + + def listen_for_keywords(self, keywords: list, timeout: float = 10.0) -> str: + """ + Listen for specific keywords locally. + Returns the recognized keyword if found, or empty string. + """ + if not self.model: + if not self.initialize(): + return "" + + # Open stream + try: + stream = self.pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=4096) + stream.start_stream() + except Exception as e: + print(f"❌ Ошибка микрофона: {e}") + return "" + + import time + start_time = time.time() + + print(f"👂 Локальное слушание ожидает: {keywords}") + + detected_text = "" + + try: + while time.time() - start_time < timeout: + data = stream.read(4096, exception_on_overflow=False) + if self.rec.AcceptWaveform(data): + res = json.loads(self.rec.Result()) + text = res.get("text", "") + if text: + print(f"📝 Локально: {text}") + # Check against keywords + for kw in keywords: + if kw in text: + detected_text = text + break + else: + # Partial result + res = json.loads(self.rec.PartialResult()) + partial = res.get("partial", "") + if partial: + for kw in keywords: + if kw in partial: + detected_text = partial + break + + if detected_text: + break + finally: + stream.stop_stream() + stream.close() + + return detected_text + + def cleanup(self): + if self.pa: + self.pa.terminate() + +# Global instance +_local_recognizer = None + +def get_local_recognizer(): + global _local_recognizer + if _local_recognizer is None: + _local_recognizer = LocalRecognizer() + return _local_recognizer + +def listen_for_keywords(keywords: list, timeout: float = 5.0) -> str: + """Listen for keywords using Vosk.""" + return get_local_recognizer().listen_for_keywords(keywords, timeout) diff --git a/main.py b/main.py index 0e20171..a45a1f1 100644 --- a/main.py +++ b/main.py @@ -13,14 +13,22 @@ Flow: import signal import sys +import re +import threading from collections import deque -from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once +from wakeword import ( + wait_for_wakeword, + cleanup as cleanup_wakeword, + check_wakeword_once, + stop_monitoring as stop_wakeword_monitoring, +) from stt import listen, cleanup as cleanup_stt, get_recognizer -from ai import ask_ai +from ai import ask_ai, translate_text from cleaner import clean_response from tts import speak, initialize as init_tts from sound_level import set_volume, parse_volume_text +from alarm import get_alarm_clock def signal_handler(sig, frame): @@ -31,6 +39,37 @@ def signal_handler(sig, frame): sys.exit(0) +def parse_translation_request(text: str): + """ + Detect translation commands and extract language direction and text. + + Returns: + dict with source_lang, target_lang, text or None + """ + patterns = [ + (r"^переведи на английский\s*(.*)$", "ru", "en"), + (r"^переведи на русский\s*(.*)$", "en", "ru"), + (r"^переведи с английского\s*(.*)$", "en", "ru"), + (r"^переведи с русского\s*(.*)$", "ru", "en"), + (r"^как по[-\s]?английски\s*(.*)$", "ru", "en"), + (r"^как по[-\s]?русски\s*(.*)$", "en", "ru"), + (r"^translate (?:to|into) english\s*(.*)$", "ru", "en"), + (r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"), + (r"^translate from english\s*(.*)$", "en", "ru"), + (r"^translate from russian\s*(.*)$", "ru", "en"), + ] + + for pattern, source_lang, target_lang in patterns: + match = re.match(pattern, text, flags=re.IGNORECASE) + if match: + return { + "source_lang": source_lang, + "target_lang": target_lang, + "text": match.group(1).strip(), + } + return None + + def main(): """Main application loop.""" print("=" * 50) @@ -46,8 +85,31 @@ def main(): # Pre-initialize models (takes a few seconds) print("⏳ Инициализация моделей...") - get_recognizer().initialize() # Initialize STT model first - init_tts() # Then initialize TTS model + init_errors = [] + + def init_stt(): + try: + get_recognizer().initialize() + except Exception as e: + init_errors.append(e) + + def init_tts_model(): + try: + init_tts() + except Exception as e: + init_errors.append(e) + + stt_thread = threading.Thread(target=init_stt, daemon=True) + tts_thread = threading.Thread(target=init_tts_model, daemon=True) + stt_thread.start() + tts_thread.start() + stt_thread.join() + tts_thread.join() + + if init_errors: + raise init_errors[0] + + alarm_clock = get_alarm_clock() # Initialize Alarm Clock print() # Initialize chat history (last 10 exchanges = 20 messages) @@ -57,37 +119,58 @@ def main(): skip_wakeword = False while True: try: + # Ensure wake word detector stream is closed before listening + stop_wakeword_monitoring() + + # Check for alarms every loop iteration + if alarm_clock.check_alarms(): + # If alarm triggered and finished (user stopped it), we continue loop + # The alarm.trigger_alarm() blocks until stopped. + skip_wakeword = False # Reset state after alarm + continue + # Step 1: Wait for wake word or Follow-up listen if not skip_wakeword: - wait_for_wakeword() + # Wait with timeout to allow alarm checking + detected = wait_for_wakeword(timeout=1.0) + + # If timeout (not detected), loop again to check alarms + if not detected: + continue + # Standard listen after activation user_text = listen(timeout_seconds=7.0) else: - # Follow-up listen (wait 2.0s for start, then listen long) - print("👂 Слушаю продолжение диалога...") - user_text = listen(timeout_seconds=20.0, detection_timeout=2.0) - + # Follow-up listen (wait 5.0s for start) + print("👂 Слушаю продолжение диалога (5 сек)...") + user_text = listen(timeout_seconds=10.0, detection_timeout=5.0) + if not user_text: - # User didn't continue conversation, go back to sleep + # User didn't continue conversation, go back to sleep silently skip_wakeword = False continue - # Reset flag for now (will be set to True if we speak successfully) - skip_wakeword = False - # Step 2: Check if speech was recognized if not user_text: + # If this was a direct wake word activation but no speech speak("Извините, я вас не расслышал. Попробуйте ещё раз.") + skip_wakeword = False # Reset to wake word continue # Check for stop commands user_text_lower = user_text.lower().strip() - if user_text_lower in ["стоп", "александр", "стоп александр"]: + if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]: print("_" * 50) print("💤 Жду 'Alexandr' для активации...") skip_wakeword = False continue + # Check for alarm commands + alarm_response = alarm_clock.parse_command(user_text) + if alarm_response: + speak(alarm_response) + continue + # Check for volume command if user_text.lower().startswith("громкость"): try: @@ -113,21 +196,67 @@ def main(): speak("Не удалось изменить громкость.") continue + # Check for translation commands + translation_request = parse_translation_request(user_text) + if translation_request: + source_lang = translation_request["source_lang"] + target_lang = translation_request["target_lang"] + text_to_translate = translation_request["text"] + + if not text_to_translate: + prompt = ( + "Скажи фразу на английском." + if source_lang == "en" + else "Скажи фразу на русском." + ) + speak(prompt) + text_to_translate = listen( + timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang + ) + + if not text_to_translate: + speak("Я не расслышал текст для перевода.") + skip_wakeword = False + continue + + translated_text = translate_text( + text_to_translate, source_lang, target_lang + ) + clean_text = clean_response(translated_text, language=target_lang) + + completed = speak( + clean_text, + check_interrupt=check_wakeword_once, + language=target_lang, + ) + stop_wakeword_monitoring() + skip_wakeword = True + + if not completed: + print("⏹️ Перевод прерван - слушаю следующий вопрос") + continue + # Step 3: Send to AI # Add user message to history chat_history.append({"role": "user", "content": user_text}) - + # Get response using history ai_response = ask_ai(list(chat_history)) - + # Add AI response to history chat_history.append({"role": "assistant", "content": ai_response}) # Step 4: Clean response - clean_text = clean_response(ai_response) + clean_text = clean_response(ai_response, language="ru") # Step 5: Speak response (with wake word interrupt support) - completed = speak(clean_text, check_interrupt=check_wakeword_once) + # This uses check_wakeword_once which opens/closes stream as needed + completed = speak( + clean_text, check_interrupt=check_wakeword_once, language="ru" + ) + + # Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once) + stop_wakeword_monitoring() # Enable follow-up mode for next iteration skip_wakeword = True @@ -136,7 +265,12 @@ def main(): # but we can print a message if not completed: print("⏹️ Ответ прерван - слушаю следующий вопрос") - continue + # If interrupted, we treat it as immediate follow up? + # Usually interruption means "I have a new command" + # So skip_wakeword = True is correct. + # But we might want to listen IMMEDIATELY without waiting 5s for start? + # listen() handles that. + pass print() print("-" * 30) @@ -149,6 +283,7 @@ def main(): except Exception as e: print(f"❌ Ошибка: {e}") speak("Произошла ошибка. Попробуйте ещё раз.") + skip_wakeword = False if __name__ == "__main__": diff --git a/stt.py b/stt.py index b4a632a..90ce1dc 100644 --- a/stt.py +++ b/stt.py @@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API. Recognizes speech from microphone using streaming WebSocket. Supports Russian (default) and English. """ + import os import asyncio import threading @@ -20,6 +21,7 @@ from deepgram import ( # Configure logging to suppress debug noise logging.getLogger("deepgram").setLevel(logging.WARNING) + class SpeechRecognizer: """Speech recognizer using Deepgram streaming.""" @@ -29,18 +31,18 @@ class SpeechRecognizer: self.stream = None self.transcript = "" self.lock = threading.Lock() - + def initialize(self): """Initialize Deepgram client and PyAudio.""" if not DEEPGRAM_API_KEY: raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.") - + print("📦 Инициализация Deepgram STT...") config = DeepgramClientOptions( verbose=logging.WARNING, ) self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config) - + self.pa = pyaudio.PyAudio() print("✅ Deepgram клиент готов") @@ -59,13 +61,14 @@ class SpeechRecognizer: async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout): """Async loop to send audio and wait for results.""" self.transcript = "" - + transcript_parts = [] + loop = asyncio.get_running_loop() stream = self._get_stream() - + stop_event = asyncio.Event() speech_started_event = asyncio.Event() - + # We need access to the outer 'self' (SpeechRecognizer instance) speech_recognizer_self = self @@ -74,9 +77,11 @@ class SpeechRecognizer: if len(sentence) == 0: return if result.is_final: - print(f"📝 Частичный результат: {sentence}") with speech_recognizer_self.lock: - speech_recognizer_self.transcript = sentence + transcript_parts.append(sentence) + speech_recognizer_self.transcript = " ".join( + transcript_parts + ).strip() def on_speech_started(unused_self, speech_started, **kwargs): loop.call_soon_threadsafe(speech_started_event.set) @@ -102,7 +107,7 @@ class SpeechRecognizer: channels=1, sample_rate=SAMPLE_RATE, interim_results=True, - utterance_end_ms="1200", + utterance_end_ms=1200, vad_events=True, ) @@ -133,38 +138,45 @@ class SpeechRecognizer: print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}") sender_task = asyncio.create_task(send_audio()) - + try: # 1. Wait for speech to start (detection_timeout) if detection_timeout: try: - await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout) + await asyncio.wait_for( + speech_started_event.wait(), timeout=detection_timeout + ) except asyncio.TimeoutError: # print("Detection timeout - no speech") stop_event.set() - + # 2. If started (or no detection timeout), wait for completion if not stop_event.is_set(): await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds) - + except asyncio.TimeoutError: # print("Global timeout") pass - + stop_event.set() await sender_task # Finish is synchronous dg_connection.finish() - + return self.transcript - def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str: + def listen( + self, + timeout_seconds: float = 7.0, + detection_timeout: float = None, + lang: str = "ru", + ) -> str: """ Listen to microphone and transcribe speech. """ if not self.dg_client: self.initialize() - + self.current_lang = lang print(f"🎙️ Слушаю ({lang})...") @@ -172,16 +184,18 @@ class SpeechRecognizer: dg_connection = self.dg_client.listen.live.v("1") try: - transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout)) - + transcript = asyncio.run( + self._process_audio(dg_connection, timeout_seconds, detection_timeout) + ) + final_text = transcript.strip() if transcript else "" if final_text: print(f"📝 Распознано: {final_text}") else: print("⚠️ Речь не распознана") - + return final_text - + except Exception as e: print(f"❌ Ошибка STT: {e}") return "" @@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer: return _recognizer -def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str: +def listen( + timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru" +) -> str: """Listen to microphone and return transcribed text.""" return get_recognizer().listen(timeout_seconds, detection_timeout, lang) @@ -218,4 +234,4 @@ def cleanup(): global _recognizer if _recognizer: _recognizer.cleanup() - _recognizer = None \ No newline at end of file + _recognizer = None diff --git a/tts.py b/tts.py index 3e37a2e..6a89276 100644 --- a/tts.py +++ b/tts.py @@ -11,7 +11,7 @@ import threading import time import warnings import re -from config import TTS_SPEAKER, TTS_SAMPLE_RATE +from config import TTS_SPEAKER, TTS_EN_SPEAKER, TTS_SAMPLE_RATE # Suppress Silero TTS warning about text length warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols") @@ -21,27 +21,55 @@ class TextToSpeech: """Text-to-Speech using Silero TTS with wake word interruption support.""" def __init__(self): - self.model = None + self.models = {} self.sample_rate = TTS_SAMPLE_RATE - self.speaker = TTS_SPEAKER + self.speakers = { + "ru": TTS_SPEAKER, + "en": TTS_EN_SPEAKER, + } self._interrupted = False self._stop_flag = threading.Event() - def initialize(self): - """Initialize Silero TTS model.""" - print("📦 Загрузка модели Silero TTS v5...") + def _load_model(self, language: str): + """Load and cache Silero TTS model for the given language.""" + if language in self.models: + return self.models[language] - # Load Silero TTS model - device = torch.device('cpu') - self.model, _ = torch.hub.load( + model_config = { + "ru": {"language": "ru", "model_id": "v5_ru"}, + "en": {"language": "en", "model_id": "v3_en"}, + } + + if language not in model_config: + raise ValueError(f"Unsupported TTS language: {language}") + + config = model_config[language] + print(f"📦 Загрузка модели Silero TTS ({language})...") + + device = torch.device("cpu") + model, _ = torch.hub.load( repo_or_dir="snakers4/silero-models", model="silero_tts", - language="ru", - speaker="v5_ru", + language=config["language"], + speaker=config["model_id"], ) - self.model.to(device) + model.to(device) - print(f"✅ Модель TTS v5 загружена (голос: {self.speaker})") + self.models[language] = model + return model + + def _get_speaker(self, language: str, model) -> str: + """Return a valid speaker for the loaded model.""" + speaker = self.speakers.get(language) + if hasattr(model, "speakers") and speaker not in model.speakers: + fallback = model.speakers[0] if model.speakers else speaker + print(f"⚠️ Голос '{speaker}' недоступен, использую '{fallback}'") + return fallback + return speaker + + def initialize(self): + """Initialize default (Russian) TTS model.""" + self._load_model("ru") def _split_text(self, text: str, max_length: int = 900) -> list[str]: """Split text into chunks smaller than max_length.""" @@ -83,13 +111,14 @@ class TextToSpeech: # Filter empty chunks return [c for c in chunks if c] - def speak(self, text: str, check_interrupt=None) -> bool: + def speak(self, text: str, check_interrupt=None, language: str = "ru") -> bool: """ Convert text to speech and play it. Args: text: Text to synthesize and speak check_interrupt: Optional callback function that returns True if playback should stop + language: Language code for voice selection ("ru" or "en") Returns: True if playback completed normally, False if interrupted @@ -97,8 +126,8 @@ class TextToSpeech: if not text.strip(): return True - if not self.model: - self.initialize() + model = self._load_model(language) + speaker = self._get_speaker(language, model) # Split text into manageable chunks chunks = self._split_text(text) @@ -120,8 +149,8 @@ class TextToSpeech: try: # Generate audio for chunk - audio = self.model.apply_tts( - text=chunk, speaker=self.speaker, sample_rate=self.sample_rate + audio = model.apply_tts( + text=chunk, speaker=speaker, sample_rate=self.sample_rate ) # Convert to numpy array @@ -218,18 +247,19 @@ def get_tts() -> TextToSpeech: return _tts -def speak(text: str, check_interrupt=None) -> bool: +def speak(text: str, check_interrupt=None, language: str = "ru") -> bool: """ Synthesize and speak the given text. Args: text: Text to speak check_interrupt: Optional callback for interrupt checking + language: Language code for voice selection ("ru" or "en") Returns: True if completed normally, False if interrupted """ - return get_tts().speak(text, check_interrupt) + return get_tts().speak(text, check_interrupt, language) def was_interrupted() -> bool: diff --git a/wakeword.py b/wakeword.py index 1c7f8be..2cf7b04 100644 --- a/wakeword.py +++ b/wakeword.py @@ -15,6 +15,7 @@ class WakeWordDetector: self.porcupine = None self.audio_stream = None self.pa = None + self._stream_closed = True # Track state explicitly def initialize(self): """Initialize Porcupine and audio stream.""" @@ -24,6 +25,19 @@ class WakeWordDetector: ) self.pa = pyaudio.PyAudio() + self._open_stream() + print("🎤 Ожидание wake word 'Alexandr'...") + + def _open_stream(self): + """Open the audio stream.""" + if self.audio_stream and not self._stream_closed: + return + + if self.audio_stream: + try: + self.audio_stream.close() + except: pass + self.audio_stream = self.pa.open( rate=self.porcupine.sample_rate, channels=1, @@ -31,44 +45,47 @@ class WakeWordDetector: input=True, frames_per_buffer=self.porcupine.frame_length ) - print("🎤 Ожидание wake word 'Alexandr'...") - - def wait_for_wakeword(self) -> bool: + self._stream_closed = False + + def stop_monitoring(self): + """Explicitly stop and close the stream.""" + if self.audio_stream and not self._stream_closed: + try: + self.audio_stream.stop_stream() + self.audio_stream.close() + except: pass + self._stream_closed = True + + def wait_for_wakeword(self, timeout: float = None) -> bool: """ - Blocks until wake word is detected. - Returns True when wake word is detected. + Blocks until wake word is detected or timeout expires. + + Args: + timeout: Maximum seconds to wait. None = infinite. + + Returns: + True if wake word detected, False if timeout. """ + import time if not self.porcupine: self.initialize() - # Ensure stream is open and active - if self.audio_stream is None or not self.audio_stream.is_active(): - # If closed or None, we might need to recreate it. - # PyAudio streams once closed cannot be reopened usually? - # We should probably recreate it. - if self.audio_stream: - try: - self.audio_stream.close() - except: pass - - self.audio_stream = self.pa.open( - rate=self.porcupine.sample_rate, - channels=1, - format=pyaudio.paInt16, - input=True, - frames_per_buffer=self.porcupine.frame_length - ) + # Ensure stream is open + self._open_stream() + + start_time = time.time() while True: + if timeout and (time.time() - start_time > timeout): + return False + pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False) pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm) keyword_index = self.porcupine.process(pcm) if keyword_index >= 0: print("✅ Wake word обнаружен!") - # Stop and CLOSE stream to release mic for STT - self.audio_stream.stop_stream() - self.audio_stream.close() + self.stop_monitoring() return True def check_wakeword_once(self) -> bool: @@ -80,20 +97,8 @@ class WakeWordDetector: self.initialize() try: - # Ensure stream is open/active - if self.audio_stream is None or not self.audio_stream.is_active(): - # Re-open if needed (similar to wait_for_wakeword logic) - if self.audio_stream: - try: - self.audio_stream.close() - except: pass - self.audio_stream = self.pa.open( - rate=self.porcupine.sample_rate, - channels=1, - format=pyaudio.paInt16, - input=True, - frames_per_buffer=self.porcupine.frame_length - ) + # Ensure stream is open + self._open_stream() pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False) pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm) @@ -108,8 +113,7 @@ class WakeWordDetector: def cleanup(self): """Release resources.""" - if self.audio_stream: - self.audio_stream.close() + self.stop_monitoring() if self.pa: self.pa.terminate() if self.porcupine: @@ -128,10 +132,14 @@ def get_detector() -> WakeWordDetector: return _detector -def wait_for_wakeword() -> bool: +def wait_for_wakeword(timeout: float = None) -> bool: """Wait for wake word detection.""" - return get_detector().wait_for_wakeword() + return get_detector().wait_for_wakeword(timeout) +def stop_monitoring(): + """Stop monitoring for wake word.""" + if _detector: + _detector.stop_monitoring() def cleanup(): """Cleanup detector resources."""