translator но без озвучивания слов на английском

This commit is contained in:
2026-01-09 01:01:27 +03:00
parent 53809c03f4
commit 242ead5355
11 changed files with 845 additions and 238 deletions

BIN
Apex-1.mp3 Normal file

Binary file not shown.

67
ai.py
View File

@@ -16,6 +16,10 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов.
ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные."""
TRANSLATION_SYSTEM_PROMPT = """You are a translation engine.
Translate from {source} to {target}.
Return only the translated text, without quotes, comments, or explanations."""
def ask_ai(messages_history: list) -> str:
"""
@@ -72,3 +76,66 @@ def ask_ai(messages_history: list) -> str:
except (KeyError, IndexError) as e:
print(f"❌ Ошибка парсинга ответа: {e}")
return "Не удалось обработать ответ от AI."
def translate_text(text: str, source_lang: str, target_lang: str) -> str:
"""
Translate text using Perplexity AI.
Args:
text: Text to translate
source_lang: Source language code ("ru" or "en")
target_lang: Target language code ("ru" or "en")
Returns:
Translated text
"""
if not text:
return "Извините, я не расслышал текст для перевода."
lang_names = {"ru": "Russian", "en": "English"}
source_name = lang_names.get(source_lang, source_lang)
target_name = lang_names.get(target_lang, target_lang)
print(f"🌍 Перевод: {source_name} -> {target_name}: {text[:60]}...")
headers = {
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
"Content-Type": "application/json",
}
messages = [
{
"role": "system",
"content": TRANSLATION_SYSTEM_PROMPT.format(
source=source_name, target=target_name
),
},
{"role": "user", "content": text},
]
payload = {
"model": PERPLEXITY_MODEL,
"messages": messages,
"max_tokens": 400,
"temperature": 0.2,
}
try:
response = requests.post(
PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
)
response.raise_for_status()
data = response.json()
ai_response = data["choices"][0]["message"]["content"]
return ai_response.strip()
except requests.exceptions.Timeout:
return "Извините, сервер не отвечает. Попробуйте позже."
except requests.exceptions.RequestException as e:
print(f"❌ Ошибка API перевода: {e}")
return "Произошла ошибка при переводе. Попробуйте ещё раз."
except (KeyError, IndexError) as e:
print(f"❌ Ошибка парсинга ответа перевода: {e}")
return "Не удалось обработать перевод."

194
alarm.py Normal file
View File

@@ -0,0 +1,194 @@
"""
Alarm clock module.
Handles alarm scheduling, persistence, and playback.
"""
import json
import time
import subprocess
import re
import threading
from datetime import datetime
from pathlib import Path
from config import BASE_DIR
from local_stt import listen_for_keywords
ALARM_FILE = BASE_DIR / "alarms.json"
ALARM_SOUND = BASE_DIR / "Apex-1.mp3"
class AlarmClock:
def __init__(self):
self.alarms = []
self.load_alarms()
def load_alarms(self):
"""Load alarms from JSON file."""
if ALARM_FILE.exists():
try:
with open(ALARM_FILE, "r", encoding="utf-8") as f:
self.alarms = json.load(f)
except Exception as e:
print(f"❌ Ошибка загрузки будильников: {e}")
self.alarms = []
def save_alarms(self):
"""Save alarms to JSON file."""
try:
with open(ALARM_FILE, "w", encoding="utf-8") as f:
json.dump(self.alarms, f, indent=4)
except Exception as e:
print(f"❌ Ошибка сохранения будильников: {e}")
def add_alarm(self, hour: int, minute: int):
"""Add a new alarm."""
# Check if already exists
for alarm in self.alarms:
if alarm["hour"] == hour and alarm["minute"] == minute:
alarm["active"] = True
self.save_alarms()
return
self.alarms.append({
"hour": hour,
"minute": minute,
"active": True
})
self.save_alarms()
print(f"⏰ Будильник установлен на {hour:02d}:{minute:02d}")
def cancel_all_alarms(self):
"""Cancel all active alarms."""
for alarm in self.alarms:
alarm["active"] = False
self.save_alarms()
print("🔕 Все будильники отменены.")
def check_alarms(self):
"""Check if any alarm should trigger now. Returns True if triggered."""
now = datetime.now()
triggered = False
for alarm in self.alarms:
if alarm["active"]:
if alarm["hour"] == now.hour and alarm["minute"] == now.minute:
# Prevent re-triggering within the same minute?
# We should disable it immediately or track last trigger time.
# For simple logic: disable it (one-time alarm).
# But wait, checking every second?
# If I disable it, it won't ring for the whole minute.
# Correct.
print(f"⏰ ВРЕМЯ БУДИЛЬНИКА: {alarm['hour']:02d}:{alarm['minute']:02d}")
alarm["active"] = False
triggered = True
self.trigger_alarm()
break # Trigger one at a time
if triggered:
self.save_alarms()
return True
return False
def trigger_alarm(self):
"""Play alarm sound and wait for stop command."""
print("🔔 БУДИЛЬНИК ЗВОНИТ! (Скажите 'Стоп' или 'Александр стоп')")
# Start playing sound in loop
# -q for quiet (no output)
# --loop -1 for infinite loop
cmd = ["mpg123", "-q", "--loop", "-1", str(ALARM_SOUND)]
try:
process = subprocess.Popen(cmd)
except FileNotFoundError:
print("❌ Ошибка: mpg123 не найден. Установите его: sudo apt install mpg123")
return
try:
# Listen for stop command using local Vosk
# Loop until stop word is heard
stop_words = ["стоп", "хватит", "тихо", "замолчи", "отмена", "александр стоп"]
while True:
# Listen in short bursts to be responsive
text = listen_for_keywords(stop_words, timeout=3.0)
if text:
print(f"🛑 Будильник остановлен по команде: '{text}'")
break
except Exception as e:
print(f"❌ Ошибка во время будильника: {e}")
finally:
# Kill the player
process.terminate()
try:
process.wait(timeout=1)
except subprocess.TimeoutExpired:
process.kill()
print("🔕 Будильник выключен.")
def parse_command(self, text: str) -> str | None:
"""
Parse user text for alarm commands.
Returns response string if command handled, None otherwise.
"""
text = text.lower()
if "будильник" not in text and "разбуди" not in text:
return None
if "отмени" in text:
self.cancel_all_alarms()
return "Хорошо, я отменил все будильники."
# Regex to find time: HH:MM, HH-MM, HH MM, HH часов MM минут
# 1. "07:30", "7:30"
match = re.search(r'\b(\d{1,2})[:.-](\d{2})\b', text)
if match:
h, m = int(match.group(1)), int(match.group(2))
if 0 <= h <= 23 and 0 <= m <= 59:
self.add_alarm(h, m)
return f"Я установил будильник на {h} часов {m} минут."
# 2. "7 часов 30 минут" or "7 30"
# Search for pattern: digits ... (digits)?
# Complex to separate from other numbers.
# Simple heuristics:
words = text.split()
nums = [int(s) for s in text.split() if s.isdigit()]
# "на 7" -> 7:00
if "на" in words or "в" in words:
# Try to find number after preposition
pass
# Let's rely on explicit digit search if regex failed
# Patterns: "на 8", "на 8 30", "на 8 часов 30 минут", "на 8 часов"
# Regex to capture hour and optional minute
# Matches: "на <H> [часов] [M] [минут]"
match_time = re.search(r'на\s+(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?', text)
if match_time:
h = int(match_time.group(1))
m = int(match_time.group(2)) if match_time.group(2) else 0
# Handle AM/PM if specified
if "вечера" in text and h < 12:
h += 12
elif "утра" in text and h == 12:
h = 0
if 0 <= h <= 23 and 0 <= m <= 59:
self.add_alarm(h, m)
return f"Хорошо, разбужу вас в {h}:{m:02d}."
return "Я не понял время для будильника. Пожалуйста, скажите точное время, например 'семь тридцать'."
# Global instance
_alarm_clock = None
def get_alarm_clock():
global _alarm_clock
if _alarm_clock is None:
_alarm_clock = AlarmClock()
return _alarm_clock

12
alarms.json Normal file
View File

@@ -0,0 +1,12 @@
[
{
"hour": 10,
"minute": 15,
"active": true
},
{
"hour": 3,
"minute": 42,
"active": false
}
]

View File

@@ -3,6 +3,7 @@ Response cleaner module.
Removes markdown formatting and special characters from AI responses.
Handles complex number-to-text conversion for Russian language.
"""
import re
import pymorphy3
from num2words import num2words
@@ -12,79 +13,86 @@ morph = pymorphy3.MorphAnalyzer()
# Preposition to case mapping (simplified heuristics)
PREPOSITION_CASES = {
'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
'во': 'loct',
'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe)
'о': 'loct',
'об': 'loct',
'обо': 'loct',
'при': 'loct',
'у': 'gent',
'от': 'gent',
'до': 'gent',
'из': 'gent',
'с': 'gent', # or ablt (instrumental)
'со': 'gent',
'без': 'gent',
'для': 'gent',
'вокруг': 'gent',
'после': 'gent',
'к': 'datv',
'ко': 'datv',
'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually.
'над': 'ablt',
'под': 'ablt',
'перед': 'ablt',
'за': 'ablt', # or acc
'между': 'ablt',
"в": "loct", # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
"во": "loct",
"на": "accs", # Dates: 'na 5 maya' -> Accusative (na pyatoe)
"о": "loct",
"об": "loct",
"обо": "loct",
"при": "loct",
"у": "gent",
"от": "gent",
"до": "gent",
"из": "gent",
"с": "gent", # or ablt (instrumental)
"со": "gent",
"без": "gent",
"для": "gent",
"вокруг": "gent",
"после": "gent",
"к": "datv",
"ко": "datv",
"по": "datv", # or accs for dates (limit). Heuristic: datv defaults usually.
"над": "ablt",
"под": "ablt",
"перед": "ablt",
"за": "ablt", # or acc
"между": "ablt",
}
# Mapping pymorphy cases to num2words cases
PYMORPHY_TO_NUM2WORDS = {
'nomn': 'nominative',
'gent': 'genitive',
'datv': 'dative',
'accs': 'accusative',
'ablt': 'instrumental',
'loct': 'prepositional',
'voct': 'nominative', # Fallback
'gen2': 'genitive',
'acc2': 'accusative',
'loc2': 'prepositional',
"nomn": "nominative",
"gent": "genitive",
"datv": "dative",
"accs": "accusative",
"ablt": "instrumental",
"loct": "prepositional",
"voct": "nominative", # Fallback
"gen2": "genitive",
"acc2": "accusative",
"loc2": "prepositional",
}
# Month names in Genitive case (as they appear in dates)
MONTHS_GENITIVE = [
'января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'
"января",
"февраля",
"марта",
"апреля",
"мая",
"июня",
"июля",
"августа",
"сентября",
"октября",
"ноября",
"декабря",
]
def get_case_from_preposition(prep_token):
"""Return pymorphy case based on preposition."""
if not prep_token:
return None
return PREPOSITION_CASES.get(prep_token.lower())
def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
def convert_number(number_str, context_type="cardinal", case="nominative", gender="m"):
"""Convert a number string to words with specific parameters."""
try:
# Handle floats
if '.' in number_str or ',' in number_str:
num_val = float(number_str.replace(',', '.'))
if "." in number_str or "," in number_str:
num_val = float(number_str.replace(",", "."))
else:
num_val = int(number_str)
return num2words(
num_val,
lang='ru',
to=context_type,
case=case,
gender=gender
)
return num2words(num_val, lang="ru", to=context_type, case=case, gender=gender)
except Exception as e:
print(f"Error converting number {number_str}: {e}")
return number_str
def numbers_to_words(text: str) -> str:
"""
Intelligent conversion of digits in text to Russian words.
@@ -96,33 +104,39 @@ def numbers_to_words(text: str) -> str:
# 1. Identify "Year" patterns: "1999 год", "в 2024 году"
def replace_year_match(match):
full_str = match.group(0)
prep = match.group(1) # Could be None
prep = match.group(1) # Could be None
year_str = match.group(2)
year_word = match.group(3) # год, году, года...
year_word = match.group(3) # год, году, года...
parsed = morph.parse(year_word)[0]
case_tag = parsed.tag.case
if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
pass
if (
prep
and prep.strip().lower() in ["в", "во"]
and case_tag in ["accs", "nomn"]
):
pass
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative')
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative")
words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m')
words = convert_number(
year_str, context_type="ordinal", case=nw_case, gender="m"
)
prefix = f"{prep} " if prep else ""
return f"{prefix}{words} {year_word}"
text = re.sub(
r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b',
r"(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b",
replace_year_match,
text
text,
)
# 2. Identify "Date" patterns: "25 июня", "с 1 мая"
# Matches: (Preposition)? (Day) (Month_Genitive)
# Day is usually 1-31.
month_regex = '|'.join(MONTHS_GENITIVE)
month_regex = "|".join(MONTHS_GENITIVE)
def replace_date_match(match):
prep = match.group(1)
@@ -131,24 +145,24 @@ def numbers_to_words(text: str) -> str:
# Determine case
# Default to Genitive ("25 июня" -> "двадцать пятого июня")
case = 'genitive'
case = "genitive"
if prep:
prep_clean = prep.strip().lower()
# Specific overrides for dates
if prep_clean == 'на':
case = 'accusative' # на 5 мая -> на пятое
elif prep_clean == 'по':
case = 'accusative' # по 5 мая -> по пятое (limit)
elif prep_clean == 'к':
case = 'dative' # к 5 мая -> к пятому
elif prep_clean in ['с', 'до', 'от']:
case = 'genitive' # с 5 мая -> с пятого
if prep_clean == "на":
case = "accusative" # на 5 мая -> на пятое
elif prep_clean == "по":
case = "accusative" # по 5 мая -> по пятое (limit)
elif prep_clean == "к":
case = "dative" # к 5 мая -> к пятому
elif prep_clean in ["с", "до", "от"]:
case = "genitive" # с 5 мая -> с пятого
else:
# Fallback to general preposition map
morph_case = get_case_from_preposition(prep_clean)
if morph_case:
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive')
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "genitive")
# Convert to Ordinal
# Dates are neuter ("число" implies neuter: "пятое", "пятого")
@@ -157,15 +171,15 @@ def numbers_to_words(text: str) -> str:
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
words = convert_number(day_str, context_type='ordinal', case=case, gender='n')
words = convert_number(day_str, context_type="ordinal", case=case, gender="n")
prefix = f"{prep} " if prep else ""
return f"{prefix}{words} {month_word}"
text = re.sub(
r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b',
r"(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(" + month_regex + r")\b",
replace_date_match,
text
text,
)
# 3. Handle remaining numbers (Cardinals)
@@ -173,32 +187,33 @@ def numbers_to_words(text: str) -> str:
prep = match.group(1)
num_str = match.group(2)
case = 'nominative'
case = "nominative"
if prep:
morph_case = get_case_from_preposition(prep.strip())
if morph_case:
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative')
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "nominative")
words = convert_number(num_str, context_type='cardinal', case=case)
words = convert_number(num_str, context_type="cardinal", case=case)
prefix = f"{prep} " if prep else ""
return f"{prefix}{words}"
text = re.sub(
r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b',
r"(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b",
replace_cardinal_match,
text
text,
)
return text
def clean_response(text: str) -> str:
def clean_response(text: str, language: str = "ru") -> str:
"""
Clean AI response from markdown formatting and special characters.
Args:
text: Raw AI response with possible markdown
language: Target language for output (affects post-processing)
Returns:
Clean text suitable for TTS
@@ -208,58 +223,64 @@ def clean_response(text: str) -> str:
# Remove citation references like [1], [2], [citation], etc.
# Using hex escapes for brackets to avoid escaping issues
text = re.sub(r'\x5B\d+\x5D', '', text)
text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE)
text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE)
text = re.sub(r"\x5B\d+\x5D", "", text)
text = re.sub(r"\x5Bcitation\s*needed\x5D", "", text, flags=re.IGNORECASE)
text = re.sub(r"\x5Bsource\x5D", "", text, flags=re.IGNORECASE)
# Remove markdown bold **text** and __text__
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'__(.+?)__', r'\1', text)
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
text = re.sub(r"__(.+?)__", r"\1", text)
# Remove markdown italic *text* and _text_
text = re.sub(r'\*(.+?)\*', r'\1', text)
text = re.sub(r'(?<!\w)_(.+?)_(?!\w)', r'\1', text)
text = re.sub(r"\*(.+?)\*", r"\1", text)
text = re.sub(r"(?<!\w)_(.+?)_(?!\w)", r"\1", text)
# Remove markdown strikethrough ~~text~~
text = re.sub(r'~~(.+?)~~', r'\1', text)
text = re.sub(r"~~(.+?)~~", r"\1", text)
# Remove markdown headers # ## ### etc.
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
text = re.sub(r"^#{1,6}\s*", "", text, flags=re.MULTILINE)
# Remove markdown links [text](url) -> text
text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text)
text = re.sub(r"\x5B([^\x5D]+)\x5D\([^)]+\)", r"\1", text)
# Remove markdown images ![alt](url)
text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text)
text = re.sub(r"!\x5B([^\x5D]*)\x5D\([^)]+\)", "", text)
# Remove inline code `code`
text = re.sub(r'`([^`]+)`', r'\1', text)
text = re.sub(r"`([^`]+)`", r"\1", text)
# Remove code blocks ```code```
text = re.sub(r'```[\s\S]*?```', '', text)
text = re.sub(r"```[\s\S]*?```", "", text)
# Remove markdown list markers (-, *, +, numbered)
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE)
text = re.sub(r"^\s*\d+\.\s+", "", text, flags=re.MULTILINE)
# Remove blockquotes
text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE)
text = re.sub(r"^\s*>\s*", "", text, flags=re.MULTILINE)
# Remove horizontal rules
text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
# Remove HTML tags if any
text = re.sub(r'<[^>]+>', '', text)
text = re.sub(r"<[^>]+>", "", text)
# Remove informal slang greetings at the beginning of sentences/responses
text = re.sub(r'^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*', '', text, flags=re.IGNORECASE | re.MULTILINE)
text = re.sub(
r"^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*",
"",
text,
flags=re.IGNORECASE | re.MULTILINE,
)
# Convert numbers to words (Russian)
text = numbers_to_words(text)
# Convert numbers to words only for Russian, and only if digits exist
if language == "ru" and re.search(r"\d", text):
text = numbers_to_words(text)
# Remove extra whitespace
text = re.sub(r'\n{3,}', '\n\n', text)
text = re.sub(r' +', ' ', text)
text = re.sub(r"\n{3,}", "\n\n", text)
text = re.sub(r" +", " ", text)
# Clean up and return
text = text.strip()

View File

@@ -2,6 +2,7 @@
Configuration module for smart speaker.
Loads environment variables from .env file.
"""
import os
from pathlib import Path
from dotenv import load_dotenv
@@ -31,6 +32,13 @@ VOSK_MODEL_PATH = BASE_DIR / "vosk-model-ru-0.42"
SAMPLE_RATE = 16000
CHANNELS = 1
# Set timezone to Moscow
import time
os.environ["TZ"] = "Europe/Moscow"
time.tzset()
# TTS configuration
TTS_SPEAKER = "eugene" # Available: aidar, baya, kseniya, xenia, eugene
TTS_SPEAKER = "eugene" # Available (ru): aidar, baya, kseniya, xenia, eugene
TTS_EN_SPEAKER = os.getenv("TTS_EN_SPEAKER", "en_0")
TTS_SAMPLE_RATE = 48000

116
local_stt.py Normal file
View File

@@ -0,0 +1,116 @@
"""
Local offline Speech-to-Text module using Vosk.
Used for simple command detection (like "stop") without internet.
"""
import os
import sys
import json
import pyaudio
from vosk import Model, KaldiRecognizer
from config import VOSK_MODEL_PATH, SAMPLE_RATE
class LocalRecognizer:
def __init__(self):
self.model = None
self.rec = None
self.pa = None
self.stream = None
def initialize(self):
if not os.path.exists(VOSK_MODEL_PATH):
print(f"❌ Ошибка: Vosk модель не найдена по пути {VOSK_MODEL_PATH}")
return False
print("📦 Инициализация локального STT (Vosk)...")
# Redirect stderr to suppress Vosk logs
try:
null_fd = os.open(os.devnull, os.O_WRONLY)
old_stderr = os.dup(2)
sys.stderr.flush()
os.dup2(null_fd, 2)
os.close(null_fd)
self.model = Model(str(VOSK_MODEL_PATH))
# Restore stderr
os.dup2(old_stderr, 2)
os.close(old_stderr)
except Exception as e:
print(f"Error initializing Vosk: {e}")
return False
self.rec = KaldiRecognizer(self.model, SAMPLE_RATE)
self.pa = pyaudio.PyAudio()
return True
def listen_for_keywords(self, keywords: list, timeout: float = 10.0) -> str:
"""
Listen for specific keywords locally.
Returns the recognized keyword if found, or empty string.
"""
if not self.model:
if not self.initialize():
return ""
# Open stream
try:
stream = self.pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=4096)
stream.start_stream()
except Exception as e:
print(f"❌ Ошибка микрофона: {e}")
return ""
import time
start_time = time.time()
print(f"👂 Локальное слушание ожидает: {keywords}")
detected_text = ""
try:
while time.time() - start_time < timeout:
data = stream.read(4096, exception_on_overflow=False)
if self.rec.AcceptWaveform(data):
res = json.loads(self.rec.Result())
text = res.get("text", "")
if text:
print(f"📝 Локально: {text}")
# Check against keywords
for kw in keywords:
if kw in text:
detected_text = text
break
else:
# Partial result
res = json.loads(self.rec.PartialResult())
partial = res.get("partial", "")
if partial:
for kw in keywords:
if kw in partial:
detected_text = partial
break
if detected_text:
break
finally:
stream.stop_stream()
stream.close()
return detected_text
def cleanup(self):
if self.pa:
self.pa.terminate()
# Global instance
_local_recognizer = None
def get_local_recognizer():
global _local_recognizer
if _local_recognizer is None:
_local_recognizer = LocalRecognizer()
return _local_recognizer
def listen_for_keywords(keywords: list, timeout: float = 5.0) -> str:
"""Listen for keywords using Vosk."""
return get_local_recognizer().listen_for_keywords(keywords, timeout)

167
main.py
View File

@@ -13,14 +13,22 @@ Flow:
import signal
import sys
import re
import threading
from collections import deque
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
from wakeword import (
wait_for_wakeword,
cleanup as cleanup_wakeword,
check_wakeword_once,
stop_monitoring as stop_wakeword_monitoring,
)
from stt import listen, cleanup as cleanup_stt, get_recognizer
from ai import ask_ai
from ai import ask_ai, translate_text
from cleaner import clean_response
from tts import speak, initialize as init_tts
from sound_level import set_volume, parse_volume_text
from alarm import get_alarm_clock
def signal_handler(sig, frame):
@@ -31,6 +39,37 @@ def signal_handler(sig, frame):
sys.exit(0)
def parse_translation_request(text: str):
"""
Detect translation commands and extract language direction and text.
Returns:
dict with source_lang, target_lang, text or None
"""
patterns = [
(r"^переведи на английский\s*(.*)$", "ru", "en"),
(r"^переведи на русский\s*(.*)$", "en", "ru"),
(r"^переведи с английского\s*(.*)$", "en", "ru"),
(r"^переведи с русского\s*(.*)$", "ru", "en"),
(r"^как по[-\s]?английски\s*(.*)$", "ru", "en"),
(r"^как по[-\s]?русски\s*(.*)$", "en", "ru"),
(r"^translate (?:to|into) english\s*(.*)$", "ru", "en"),
(r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"),
(r"^translate from english\s*(.*)$", "en", "ru"),
(r"^translate from russian\s*(.*)$", "ru", "en"),
]
for pattern, source_lang, target_lang in patterns:
match = re.match(pattern, text, flags=re.IGNORECASE)
if match:
return {
"source_lang": source_lang,
"target_lang": target_lang,
"text": match.group(1).strip(),
}
return None
def main():
"""Main application loop."""
print("=" * 50)
@@ -46,8 +85,31 @@ def main():
# Pre-initialize models (takes a few seconds)
print("⏳ Инициализация моделей...")
get_recognizer().initialize() # Initialize STT model first
init_tts() # Then initialize TTS model
init_errors = []
def init_stt():
try:
get_recognizer().initialize()
except Exception as e:
init_errors.append(e)
def init_tts_model():
try:
init_tts()
except Exception as e:
init_errors.append(e)
stt_thread = threading.Thread(target=init_stt, daemon=True)
tts_thread = threading.Thread(target=init_tts_model, daemon=True)
stt_thread.start()
tts_thread.start()
stt_thread.join()
tts_thread.join()
if init_errors:
raise init_errors[0]
alarm_clock = get_alarm_clock() # Initialize Alarm Clock
print()
# Initialize chat history (last 10 exchanges = 20 messages)
@@ -57,37 +119,58 @@ def main():
skip_wakeword = False
while True:
try:
# Ensure wake word detector stream is closed before listening
stop_wakeword_monitoring()
# Check for alarms every loop iteration
if alarm_clock.check_alarms():
# If alarm triggered and finished (user stopped it), we continue loop
# The alarm.trigger_alarm() blocks until stopped.
skip_wakeword = False # Reset state after alarm
continue
# Step 1: Wait for wake word or Follow-up listen
if not skip_wakeword:
wait_for_wakeword()
# Wait with timeout to allow alarm checking
detected = wait_for_wakeword(timeout=1.0)
# If timeout (not detected), loop again to check alarms
if not detected:
continue
# Standard listen after activation
user_text = listen(timeout_seconds=7.0)
else:
# Follow-up listen (wait 2.0s for start, then listen long)
print("👂 Слушаю продолжение диалога...")
user_text = listen(timeout_seconds=20.0, detection_timeout=2.0)
# Follow-up listen (wait 5.0s for start)
print("👂 Слушаю продолжение диалога (5 сек)...")
user_text = listen(timeout_seconds=10.0, detection_timeout=5.0)
if not user_text:
# User didn't continue conversation, go back to sleep
# User didn't continue conversation, go back to sleep silently
skip_wakeword = False
continue
# Reset flag for now (will be set to True if we speak successfully)
skip_wakeword = False
# Step 2: Check if speech was recognized
if not user_text:
# If this was a direct wake word activation but no speech
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
skip_wakeword = False # Reset to wake word
continue
# Check for stop commands
user_text_lower = user_text.lower().strip()
if user_text_lower in ["стоп", "александр", "стоп александр"]:
if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]:
print("_" * 50)
print("💤 Жду 'Alexandr' для активации...")
skip_wakeword = False
continue
# Check for alarm commands
alarm_response = alarm_clock.parse_command(user_text)
if alarm_response:
speak(alarm_response)
continue
# Check for volume command
if user_text.lower().startswith("громкость"):
try:
@@ -113,6 +196,46 @@ def main():
speak("Не удалось изменить громкость.")
continue
# Check for translation commands
translation_request = parse_translation_request(user_text)
if translation_request:
source_lang = translation_request["source_lang"]
target_lang = translation_request["target_lang"]
text_to_translate = translation_request["text"]
if not text_to_translate:
prompt = (
"Скажи фразу на английском."
if source_lang == "en"
else "Скажи фразу на русском."
)
speak(prompt)
text_to_translate = listen(
timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
)
if not text_to_translate:
speak("Я не расслышал текст для перевода.")
skip_wakeword = False
continue
translated_text = translate_text(
text_to_translate, source_lang, target_lang
)
clean_text = clean_response(translated_text, language=target_lang)
completed = speak(
clean_text,
check_interrupt=check_wakeword_once,
language=target_lang,
)
stop_wakeword_monitoring()
skip_wakeword = True
if not completed:
print("⏹️ Перевод прерван - слушаю следующий вопрос")
continue
# Step 3: Send to AI
# Add user message to history
chat_history.append({"role": "user", "content": user_text})
@@ -124,10 +247,16 @@ def main():
chat_history.append({"role": "assistant", "content": ai_response})
# Step 4: Clean response
clean_text = clean_response(ai_response)
clean_text = clean_response(ai_response, language="ru")
# Step 5: Speak response (with wake word interrupt support)
completed = speak(clean_text, check_interrupt=check_wakeword_once)
# This uses check_wakeword_once which opens/closes stream as needed
completed = speak(
clean_text, check_interrupt=check_wakeword_once, language="ru"
)
# Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once)
stop_wakeword_monitoring()
# Enable follow-up mode for next iteration
skip_wakeword = True
@@ -136,7 +265,12 @@ def main():
# but we can print a message
if not completed:
print("⏹️ Ответ прерван - слушаю следующий вопрос")
continue
# If interrupted, we treat it as immediate follow up?
# Usually interruption means "I have a new command"
# So skip_wakeword = True is correct.
# But we might want to listen IMMEDIATELY without waiting 5s for start?
# listen() handles that.
pass
print()
print("-" * 30)
@@ -149,6 +283,7 @@ def main():
except Exception as e:
print(f"❌ Ошибка: {e}")
speak("Произошла ошибка. Попробуйте ещё раз.")
skip_wakeword = False
if __name__ == "__main__":

30
stt.py
View File

@@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API.
Recognizes speech from microphone using streaming WebSocket.
Supports Russian (default) and English.
"""
import os
import asyncio
import threading
@@ -20,6 +21,7 @@ from deepgram import (
# Configure logging to suppress debug noise
logging.getLogger("deepgram").setLevel(logging.WARNING)
class SpeechRecognizer:
"""Speech recognizer using Deepgram streaming."""
@@ -59,6 +61,7 @@ class SpeechRecognizer:
async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
"""Async loop to send audio and wait for results."""
self.transcript = ""
transcript_parts = []
loop = asyncio.get_running_loop()
stream = self._get_stream()
@@ -74,9 +77,11 @@ class SpeechRecognizer:
if len(sentence) == 0:
return
if result.is_final:
print(f"📝 Частичный результат: {sentence}")
with speech_recognizer_self.lock:
speech_recognizer_self.transcript = sentence
transcript_parts.append(sentence)
speech_recognizer_self.transcript = " ".join(
transcript_parts
).strip()
def on_speech_started(unused_self, speech_started, **kwargs):
loop.call_soon_threadsafe(speech_started_event.set)
@@ -102,7 +107,7 @@ class SpeechRecognizer:
channels=1,
sample_rate=SAMPLE_RATE,
interim_results=True,
utterance_end_ms="1200",
utterance_end_ms=1200,
vad_events=True,
)
@@ -138,7 +143,9 @@ class SpeechRecognizer:
# 1. Wait for speech to start (detection_timeout)
if detection_timeout:
try:
await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout)
await asyncio.wait_for(
speech_started_event.wait(), timeout=detection_timeout
)
except asyncio.TimeoutError:
# print("Detection timeout - no speech")
stop_event.set()
@@ -158,7 +165,12 @@ class SpeechRecognizer:
return self.transcript
def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
def listen(
self,
timeout_seconds: float = 7.0,
detection_timeout: float = None,
lang: str = "ru",
) -> str:
"""
Listen to microphone and transcribe speech.
"""
@@ -172,7 +184,9 @@ class SpeechRecognizer:
dg_connection = self.dg_client.listen.live.v("1")
try:
transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout))
transcript = asyncio.run(
self._process_audio(dg_connection, timeout_seconds, detection_timeout)
)
final_text = transcript.strip() if transcript else ""
if final_text:
@@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer:
return _recognizer
def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
def listen(
timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru"
) -> str:
"""Listen to microphone and return transcribed text."""
return get_recognizer().listen(timeout_seconds, detection_timeout, lang)

70
tts.py
View File

@@ -11,7 +11,7 @@ import threading
import time
import warnings
import re
from config import TTS_SPEAKER, TTS_SAMPLE_RATE
from config import TTS_SPEAKER, TTS_EN_SPEAKER, TTS_SAMPLE_RATE
# Suppress Silero TTS warning about text length
warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols")
@@ -21,27 +21,55 @@ class TextToSpeech:
"""Text-to-Speech using Silero TTS with wake word interruption support."""
def __init__(self):
self.model = None
self.models = {}
self.sample_rate = TTS_SAMPLE_RATE
self.speaker = TTS_SPEAKER
self.speakers = {
"ru": TTS_SPEAKER,
"en": TTS_EN_SPEAKER,
}
self._interrupted = False
self._stop_flag = threading.Event()
def initialize(self):
"""Initialize Silero TTS model."""
print("📦 Загрузка модели Silero TTS v5...")
def _load_model(self, language: str):
"""Load and cache Silero TTS model for the given language."""
if language in self.models:
return self.models[language]
# Load Silero TTS model
device = torch.device('cpu')
self.model, _ = torch.hub.load(
model_config = {
"ru": {"language": "ru", "model_id": "v5_ru"},
"en": {"language": "en", "model_id": "v3_en"},
}
if language not in model_config:
raise ValueError(f"Unsupported TTS language: {language}")
config = model_config[language]
print(f"📦 Загрузка модели Silero TTS ({language})...")
device = torch.device("cpu")
model, _ = torch.hub.load(
repo_or_dir="snakers4/silero-models",
model="silero_tts",
language="ru",
speaker="v5_ru",
language=config["language"],
speaker=config["model_id"],
)
self.model.to(device)
model.to(device)
print(f"✅ Модель TTS v5 загружена (голос: {self.speaker})")
self.models[language] = model
return model
def _get_speaker(self, language: str, model) -> str:
"""Return a valid speaker for the loaded model."""
speaker = self.speakers.get(language)
if hasattr(model, "speakers") and speaker not in model.speakers:
fallback = model.speakers[0] if model.speakers else speaker
print(f"⚠️ Голос '{speaker}' недоступен, использую '{fallback}'")
return fallback
return speaker
def initialize(self):
"""Initialize default (Russian) TTS model."""
self._load_model("ru")
def _split_text(self, text: str, max_length: int = 900) -> list[str]:
"""Split text into chunks smaller than max_length."""
@@ -83,13 +111,14 @@ class TextToSpeech:
# Filter empty chunks
return [c for c in chunks if c]
def speak(self, text: str, check_interrupt=None) -> bool:
def speak(self, text: str, check_interrupt=None, language: str = "ru") -> bool:
"""
Convert text to speech and play it.
Args:
text: Text to synthesize and speak
check_interrupt: Optional callback function that returns True if playback should stop
language: Language code for voice selection ("ru" or "en")
Returns:
True if playback completed normally, False if interrupted
@@ -97,8 +126,8 @@ class TextToSpeech:
if not text.strip():
return True
if not self.model:
self.initialize()
model = self._load_model(language)
speaker = self._get_speaker(language, model)
# Split text into manageable chunks
chunks = self._split_text(text)
@@ -120,8 +149,8 @@ class TextToSpeech:
try:
# Generate audio for chunk
audio = self.model.apply_tts(
text=chunk, speaker=self.speaker, sample_rate=self.sample_rate
audio = model.apply_tts(
text=chunk, speaker=speaker, sample_rate=self.sample_rate
)
# Convert to numpy array
@@ -218,18 +247,19 @@ def get_tts() -> TextToSpeech:
return _tts
def speak(text: str, check_interrupt=None) -> bool:
def speak(text: str, check_interrupt=None, language: str = "ru") -> bool:
"""
Synthesize and speak the given text.
Args:
text: Text to speak
check_interrupt: Optional callback for interrupt checking
language: Language code for voice selection ("ru" or "en")
Returns:
True if completed normally, False if interrupted
"""
return get_tts().speak(text, check_interrupt)
return get_tts().speak(text, check_interrupt, language)
def was_interrupted() -> bool:

View File

@@ -15,6 +15,7 @@ class WakeWordDetector:
self.porcupine = None
self.audio_stream = None
self.pa = None
self._stream_closed = True # Track state explicitly
def initialize(self):
"""Initialize Porcupine and audio stream."""
@@ -24,6 +25,19 @@ class WakeWordDetector:
)
self.pa = pyaudio.PyAudio()
self._open_stream()
print("🎤 Ожидание wake word 'Alexandr'...")
def _open_stream(self):
"""Open the audio stream."""
if self.audio_stream and not self._stream_closed:
return
if self.audio_stream:
try:
self.audio_stream.close()
except: pass
self.audio_stream = self.pa.open(
rate=self.porcupine.sample_rate,
channels=1,
@@ -31,44 +45,47 @@ class WakeWordDetector:
input=True,
frames_per_buffer=self.porcupine.frame_length
)
print("🎤 Ожидание wake word 'Alexandr'...")
self._stream_closed = False
def wait_for_wakeword(self) -> bool:
def stop_monitoring(self):
"""Explicitly stop and close the stream."""
if self.audio_stream and not self._stream_closed:
try:
self.audio_stream.stop_stream()
self.audio_stream.close()
except: pass
self._stream_closed = True
def wait_for_wakeword(self, timeout: float = None) -> bool:
"""
Blocks until wake word is detected.
Returns True when wake word is detected.
Blocks until wake word is detected or timeout expires.
Args:
timeout: Maximum seconds to wait. None = infinite.
Returns:
True if wake word detected, False if timeout.
"""
import time
if not self.porcupine:
self.initialize()
# Ensure stream is open and active
if self.audio_stream is None or not self.audio_stream.is_active():
# If closed or None, we might need to recreate it.
# PyAudio streams once closed cannot be reopened usually?
# We should probably recreate it.
if self.audio_stream:
try:
self.audio_stream.close()
except: pass
# Ensure stream is open
self._open_stream()
self.audio_stream = self.pa.open(
rate=self.porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=self.porcupine.frame_length
)
start_time = time.time()
while True:
if timeout and (time.time() - start_time > timeout):
return False
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
keyword_index = self.porcupine.process(pcm)
if keyword_index >= 0:
print("✅ Wake word обнаружен!")
# Stop and CLOSE stream to release mic for STT
self.audio_stream.stop_stream()
self.audio_stream.close()
self.stop_monitoring()
return True
def check_wakeword_once(self) -> bool:
@@ -80,20 +97,8 @@ class WakeWordDetector:
self.initialize()
try:
# Ensure stream is open/active
if self.audio_stream is None or not self.audio_stream.is_active():
# Re-open if needed (similar to wait_for_wakeword logic)
if self.audio_stream:
try:
self.audio_stream.close()
except: pass
self.audio_stream = self.pa.open(
rate=self.porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=self.porcupine.frame_length
)
# Ensure stream is open
self._open_stream()
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
@@ -108,8 +113,7 @@ class WakeWordDetector:
def cleanup(self):
"""Release resources."""
if self.audio_stream:
self.audio_stream.close()
self.stop_monitoring()
if self.pa:
self.pa.terminate()
if self.porcupine:
@@ -128,10 +132,14 @@ def get_detector() -> WakeWordDetector:
return _detector
def wait_for_wakeword() -> bool:
def wait_for_wakeword(timeout: float = None) -> bool:
"""Wait for wake word detection."""
return get_detector().wait_for_wakeword()
return get_detector().wait_for_wakeword(timeout)
def stop_monitoring():
"""Stop monitoring for wake word."""
if _detector:
_detector.stop_monitoring()
def cleanup():
"""Cleanup detector resources."""