translator но без озвучивания слов на английском
This commit is contained in:
BIN
Apex-1.mp3
Normal file
BIN
Apex-1.mp3
Normal file
Binary file not shown.
67
ai.py
67
ai.py
@@ -16,6 +16,10 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
|
||||
Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов.
|
||||
ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные."""
|
||||
|
||||
TRANSLATION_SYSTEM_PROMPT = """You are a translation engine.
|
||||
Translate from {source} to {target}.
|
||||
Return only the translated text, without quotes, comments, or explanations."""
|
||||
|
||||
|
||||
def ask_ai(messages_history: list) -> str:
|
||||
"""
|
||||
@@ -72,3 +76,66 @@ def ask_ai(messages_history: list) -> str:
|
||||
except (KeyError, IndexError) as e:
|
||||
print(f"❌ Ошибка парсинга ответа: {e}")
|
||||
return "Не удалось обработать ответ от AI."
|
||||
|
||||
|
||||
def translate_text(text: str, source_lang: str, target_lang: str) -> str:
|
||||
"""
|
||||
Translate text using Perplexity AI.
|
||||
|
||||
Args:
|
||||
text: Text to translate
|
||||
source_lang: Source language code ("ru" or "en")
|
||||
target_lang: Target language code ("ru" or "en")
|
||||
|
||||
Returns:
|
||||
Translated text
|
||||
"""
|
||||
if not text:
|
||||
return "Извините, я не расслышал текст для перевода."
|
||||
|
||||
lang_names = {"ru": "Russian", "en": "English"}
|
||||
source_name = lang_names.get(source_lang, source_lang)
|
||||
target_name = lang_names.get(target_lang, target_lang)
|
||||
|
||||
print(f"🌍 Перевод: {source_name} -> {target_name}: {text[:60]}...")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": TRANSLATION_SYSTEM_PROMPT.format(
|
||||
source=source_name, target=target_name
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": text},
|
||||
]
|
||||
|
||||
payload = {
|
||||
"model": PERPLEXITY_MODEL,
|
||||
"messages": messages,
|
||||
"max_tokens": 400,
|
||||
"temperature": 0.2,
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
ai_response = data["choices"][0]["message"]["content"]
|
||||
return ai_response.strip()
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return "Извините, сервер не отвечает. Попробуйте позже."
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"❌ Ошибка API перевода: {e}")
|
||||
return "Произошла ошибка при переводе. Попробуйте ещё раз."
|
||||
except (KeyError, IndexError) as e:
|
||||
print(f"❌ Ошибка парсинга ответа перевода: {e}")
|
||||
return "Не удалось обработать перевод."
|
||||
|
||||
194
alarm.py
Normal file
194
alarm.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Alarm clock module.
|
||||
Handles alarm scheduling, persistence, and playback.
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
import subprocess
|
||||
import re
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from config import BASE_DIR
|
||||
from local_stt import listen_for_keywords
|
||||
|
||||
ALARM_FILE = BASE_DIR / "alarms.json"
|
||||
ALARM_SOUND = BASE_DIR / "Apex-1.mp3"
|
||||
|
||||
class AlarmClock:
|
||||
def __init__(self):
|
||||
self.alarms = []
|
||||
self.load_alarms()
|
||||
|
||||
def load_alarms(self):
|
||||
"""Load alarms from JSON file."""
|
||||
if ALARM_FILE.exists():
|
||||
try:
|
||||
with open(ALARM_FILE, "r", encoding="utf-8") as f:
|
||||
self.alarms = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка загрузки будильников: {e}")
|
||||
self.alarms = []
|
||||
|
||||
def save_alarms(self):
|
||||
"""Save alarms to JSON file."""
|
||||
try:
|
||||
with open(ALARM_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(self.alarms, f, indent=4)
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка сохранения будильников: {e}")
|
||||
|
||||
def add_alarm(self, hour: int, minute: int):
|
||||
"""Add a new alarm."""
|
||||
# Check if already exists
|
||||
for alarm in self.alarms:
|
||||
if alarm["hour"] == hour and alarm["minute"] == minute:
|
||||
alarm["active"] = True
|
||||
self.save_alarms()
|
||||
return
|
||||
|
||||
self.alarms.append({
|
||||
"hour": hour,
|
||||
"minute": minute,
|
||||
"active": True
|
||||
})
|
||||
self.save_alarms()
|
||||
print(f"⏰ Будильник установлен на {hour:02d}:{minute:02d}")
|
||||
|
||||
def cancel_all_alarms(self):
|
||||
"""Cancel all active alarms."""
|
||||
for alarm in self.alarms:
|
||||
alarm["active"] = False
|
||||
self.save_alarms()
|
||||
print("🔕 Все будильники отменены.")
|
||||
|
||||
def check_alarms(self):
|
||||
"""Check if any alarm should trigger now. Returns True if triggered."""
|
||||
now = datetime.now()
|
||||
triggered = False
|
||||
|
||||
for alarm in self.alarms:
|
||||
if alarm["active"]:
|
||||
if alarm["hour"] == now.hour and alarm["minute"] == now.minute:
|
||||
# Prevent re-triggering within the same minute?
|
||||
# We should disable it immediately or track last trigger time.
|
||||
# For simple logic: disable it (one-time alarm).
|
||||
|
||||
# But wait, checking every second?
|
||||
# If I disable it, it won't ring for the whole minute.
|
||||
# Correct.
|
||||
print(f"⏰ ВРЕМЯ БУДИЛЬНИКА: {alarm['hour']:02d}:{alarm['minute']:02d}")
|
||||
alarm["active"] = False
|
||||
triggered = True
|
||||
self.trigger_alarm()
|
||||
break # Trigger one at a time
|
||||
|
||||
if triggered:
|
||||
self.save_alarms()
|
||||
return True
|
||||
return False
|
||||
|
||||
def trigger_alarm(self):
|
||||
"""Play alarm sound and wait for stop command."""
|
||||
print("🔔 БУДИЛЬНИК ЗВОНИТ! (Скажите 'Стоп' или 'Александр стоп')")
|
||||
|
||||
# Start playing sound in loop
|
||||
# -q for quiet (no output)
|
||||
# --loop -1 for infinite loop
|
||||
cmd = ["mpg123", "-q", "--loop", "-1", str(ALARM_SOUND)]
|
||||
|
||||
try:
|
||||
process = subprocess.Popen(cmd)
|
||||
except FileNotFoundError:
|
||||
print("❌ Ошибка: mpg123 не найден. Установите его: sudo apt install mpg123")
|
||||
return
|
||||
|
||||
try:
|
||||
# Listen for stop command using local Vosk
|
||||
# Loop until stop word is heard
|
||||
stop_words = ["стоп", "хватит", "тихо", "замолчи", "отмена", "александр стоп"]
|
||||
|
||||
while True:
|
||||
# Listen in short bursts to be responsive
|
||||
text = listen_for_keywords(stop_words, timeout=3.0)
|
||||
if text:
|
||||
print(f"🛑 Будильник остановлен по команде: '{text}'")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка во время будильника: {e}")
|
||||
finally:
|
||||
# Kill the player
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=1)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
print("🔕 Будильник выключен.")
|
||||
|
||||
def parse_command(self, text: str) -> str | None:
|
||||
"""
|
||||
Parse user text for alarm commands.
|
||||
Returns response string if command handled, None otherwise.
|
||||
"""
|
||||
text = text.lower()
|
||||
if "будильник" not in text and "разбуди" not in text:
|
||||
return None
|
||||
|
||||
if "отмени" in text:
|
||||
self.cancel_all_alarms()
|
||||
return "Хорошо, я отменил все будильники."
|
||||
|
||||
# Regex to find time: HH:MM, HH-MM, HH MM, HH часов MM минут
|
||||
# 1. "07:30", "7:30"
|
||||
match = re.search(r'\b(\d{1,2})[:.-](\d{2})\b', text)
|
||||
if match:
|
||||
h, m = int(match.group(1)), int(match.group(2))
|
||||
if 0 <= h <= 23 and 0 <= m <= 59:
|
||||
self.add_alarm(h, m)
|
||||
return f"Я установил будильник на {h} часов {m} минут."
|
||||
|
||||
# 2. "7 часов 30 минут" or "7 30"
|
||||
# Search for pattern: digits ... (digits)?
|
||||
# Complex to separate from other numbers.
|
||||
|
||||
# Simple heuristics:
|
||||
words = text.split()
|
||||
nums = [int(s) for s in text.split() if s.isdigit()]
|
||||
|
||||
# "на 7" -> 7:00
|
||||
if "на" in words or "в" in words:
|
||||
# Try to find number after preposition
|
||||
pass
|
||||
|
||||
# Let's rely on explicit digit search if regex failed
|
||||
# Patterns: "на 8", "на 8 30", "на 8 часов 30 минут", "на 8 часов"
|
||||
|
||||
# Regex to capture hour and optional minute
|
||||
# Matches: "на <H> [часов] [M] [минут]"
|
||||
match_time = re.search(r'на\s+(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?', text)
|
||||
|
||||
if match_time:
|
||||
h = int(match_time.group(1))
|
||||
m = int(match_time.group(2)) if match_time.group(2) else 0
|
||||
|
||||
# Handle AM/PM if specified
|
||||
if "вечера" in text and h < 12:
|
||||
h += 12
|
||||
elif "утра" in text and h == 12:
|
||||
h = 0
|
||||
|
||||
if 0 <= h <= 23 and 0 <= m <= 59:
|
||||
self.add_alarm(h, m)
|
||||
return f"Хорошо, разбужу вас в {h}:{m:02d}."
|
||||
|
||||
return "Я не понял время для будильника. Пожалуйста, скажите точное время, например 'семь тридцать'."
|
||||
|
||||
# Global instance
|
||||
_alarm_clock = None
|
||||
|
||||
def get_alarm_clock():
|
||||
global _alarm_clock
|
||||
if _alarm_clock is None:
|
||||
_alarm_clock = AlarmClock()
|
||||
return _alarm_clock
|
||||
12
alarms.json
Normal file
12
alarms.json
Normal file
@@ -0,0 +1,12 @@
|
||||
[
|
||||
{
|
||||
"hour": 10,
|
||||
"minute": 15,
|
||||
"active": true
|
||||
},
|
||||
{
|
||||
"hour": 3,
|
||||
"minute": 42,
|
||||
"active": false
|
||||
}
|
||||
]
|
||||
217
cleaner.py
217
cleaner.py
@@ -3,6 +3,7 @@ Response cleaner module.
|
||||
Removes markdown formatting and special characters from AI responses.
|
||||
Handles complex number-to-text conversion for Russian language.
|
||||
"""
|
||||
|
||||
import re
|
||||
import pymorphy3
|
||||
from num2words import num2words
|
||||
@@ -12,79 +13,86 @@ morph = pymorphy3.MorphAnalyzer()
|
||||
|
||||
# Preposition to case mapping (simplified heuristics)
|
||||
PREPOSITION_CASES = {
|
||||
'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
|
||||
'во': 'loct',
|
||||
'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe)
|
||||
'о': 'loct',
|
||||
'об': 'loct',
|
||||
'обо': 'loct',
|
||||
'при': 'loct',
|
||||
'у': 'gent',
|
||||
'от': 'gent',
|
||||
'до': 'gent',
|
||||
'из': 'gent',
|
||||
'с': 'gent', # or ablt (instrumental)
|
||||
'со': 'gent',
|
||||
'без': 'gent',
|
||||
'для': 'gent',
|
||||
'вокруг': 'gent',
|
||||
'после': 'gent',
|
||||
'к': 'datv',
|
||||
'ко': 'datv',
|
||||
'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually.
|
||||
'над': 'ablt',
|
||||
'под': 'ablt',
|
||||
'перед': 'ablt',
|
||||
'за': 'ablt', # or acc
|
||||
'между': 'ablt',
|
||||
"в": "loct", # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
|
||||
"во": "loct",
|
||||
"на": "accs", # Dates: 'na 5 maya' -> Accusative (na pyatoe)
|
||||
"о": "loct",
|
||||
"об": "loct",
|
||||
"обо": "loct",
|
||||
"при": "loct",
|
||||
"у": "gent",
|
||||
"от": "gent",
|
||||
"до": "gent",
|
||||
"из": "gent",
|
||||
"с": "gent", # or ablt (instrumental)
|
||||
"со": "gent",
|
||||
"без": "gent",
|
||||
"для": "gent",
|
||||
"вокруг": "gent",
|
||||
"после": "gent",
|
||||
"к": "datv",
|
||||
"ко": "datv",
|
||||
"по": "datv", # or accs for dates (limit). Heuristic: datv defaults usually.
|
||||
"над": "ablt",
|
||||
"под": "ablt",
|
||||
"перед": "ablt",
|
||||
"за": "ablt", # or acc
|
||||
"между": "ablt",
|
||||
}
|
||||
|
||||
# Mapping pymorphy cases to num2words cases
|
||||
PYMORPHY_TO_NUM2WORDS = {
|
||||
'nomn': 'nominative',
|
||||
'gent': 'genitive',
|
||||
'datv': 'dative',
|
||||
'accs': 'accusative',
|
||||
'ablt': 'instrumental',
|
||||
'loct': 'prepositional',
|
||||
'voct': 'nominative', # Fallback
|
||||
'gen2': 'genitive',
|
||||
'acc2': 'accusative',
|
||||
'loc2': 'prepositional',
|
||||
"nomn": "nominative",
|
||||
"gent": "genitive",
|
||||
"datv": "dative",
|
||||
"accs": "accusative",
|
||||
"ablt": "instrumental",
|
||||
"loct": "prepositional",
|
||||
"voct": "nominative", # Fallback
|
||||
"gen2": "genitive",
|
||||
"acc2": "accusative",
|
||||
"loc2": "prepositional",
|
||||
}
|
||||
|
||||
# Month names in Genitive case (as they appear in dates)
|
||||
MONTHS_GENITIVE = [
|
||||
'января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
|
||||
'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'
|
||||
"января",
|
||||
"февраля",
|
||||
"марта",
|
||||
"апреля",
|
||||
"мая",
|
||||
"июня",
|
||||
"июля",
|
||||
"августа",
|
||||
"сентября",
|
||||
"октября",
|
||||
"ноября",
|
||||
"декабря",
|
||||
]
|
||||
|
||||
|
||||
def get_case_from_preposition(prep_token):
|
||||
"""Return pymorphy case based on preposition."""
|
||||
if not prep_token:
|
||||
return None
|
||||
return PREPOSITION_CASES.get(prep_token.lower())
|
||||
|
||||
def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
|
||||
|
||||
def convert_number(number_str, context_type="cardinal", case="nominative", gender="m"):
|
||||
"""Convert a number string to words with specific parameters."""
|
||||
try:
|
||||
# Handle floats
|
||||
if '.' in number_str or ',' in number_str:
|
||||
num_val = float(number_str.replace(',', '.'))
|
||||
if "." in number_str or "," in number_str:
|
||||
num_val = float(number_str.replace(",", "."))
|
||||
else:
|
||||
num_val = int(number_str)
|
||||
|
||||
return num2words(
|
||||
num_val,
|
||||
lang='ru',
|
||||
to=context_type,
|
||||
case=case,
|
||||
gender=gender
|
||||
)
|
||||
return num2words(num_val, lang="ru", to=context_type, case=case, gender=gender)
|
||||
except Exception as e:
|
||||
print(f"Error converting number {number_str}: {e}")
|
||||
return number_str
|
||||
|
||||
|
||||
def numbers_to_words(text: str) -> str:
|
||||
"""
|
||||
Intelligent conversion of digits in text to Russian words.
|
||||
@@ -96,33 +104,39 @@ def numbers_to_words(text: str) -> str:
|
||||
# 1. Identify "Year" patterns: "1999 год", "в 2024 году"
|
||||
def replace_year_match(match):
|
||||
full_str = match.group(0)
|
||||
prep = match.group(1) # Could be None
|
||||
prep = match.group(1) # Could be None
|
||||
year_str = match.group(2)
|
||||
year_word = match.group(3) # год, году, года...
|
||||
year_word = match.group(3) # год, году, года...
|
||||
|
||||
parsed = morph.parse(year_word)[0]
|
||||
case_tag = parsed.tag.case
|
||||
|
||||
if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
|
||||
pass
|
||||
if (
|
||||
prep
|
||||
and prep.strip().lower() in ["в", "во"]
|
||||
and case_tag in ["accs", "nomn"]
|
||||
):
|
||||
pass
|
||||
|
||||
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative')
|
||||
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative")
|
||||
|
||||
words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m')
|
||||
words = convert_number(
|
||||
year_str, context_type="ordinal", case=nw_case, gender="m"
|
||||
)
|
||||
|
||||
prefix = f"{prep} " if prep else ""
|
||||
return f"{prefix}{words} {year_word}"
|
||||
|
||||
text = re.sub(
|
||||
r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b',
|
||||
r"(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b",
|
||||
replace_year_match,
|
||||
text
|
||||
text,
|
||||
)
|
||||
|
||||
# 2. Identify "Date" patterns: "25 июня", "с 1 мая"
|
||||
# Matches: (Preposition)? (Day) (Month_Genitive)
|
||||
# Day is usually 1-31.
|
||||
month_regex = '|'.join(MONTHS_GENITIVE)
|
||||
month_regex = "|".join(MONTHS_GENITIVE)
|
||||
|
||||
def replace_date_match(match):
|
||||
prep = match.group(1)
|
||||
@@ -131,24 +145,24 @@ def numbers_to_words(text: str) -> str:
|
||||
|
||||
# Determine case
|
||||
# Default to Genitive ("25 июня" -> "двадцать пятого июня")
|
||||
case = 'genitive'
|
||||
case = "genitive"
|
||||
|
||||
if prep:
|
||||
prep_clean = prep.strip().lower()
|
||||
# Specific overrides for dates
|
||||
if prep_clean == 'на':
|
||||
case = 'accusative' # на 5 мая -> на пятое
|
||||
elif prep_clean == 'по':
|
||||
case = 'accusative' # по 5 мая -> по пятое (limit)
|
||||
elif prep_clean == 'к':
|
||||
case = 'dative' # к 5 мая -> к пятому
|
||||
elif prep_clean in ['с', 'до', 'от']:
|
||||
case = 'genitive' # с 5 мая -> с пятого
|
||||
if prep_clean == "на":
|
||||
case = "accusative" # на 5 мая -> на пятое
|
||||
elif prep_clean == "по":
|
||||
case = "accusative" # по 5 мая -> по пятое (limit)
|
||||
elif prep_clean == "к":
|
||||
case = "dative" # к 5 мая -> к пятому
|
||||
elif prep_clean in ["с", "до", "от"]:
|
||||
case = "genitive" # с 5 мая -> с пятого
|
||||
else:
|
||||
# Fallback to general preposition map
|
||||
morph_case = get_case_from_preposition(prep_clean)
|
||||
if morph_case:
|
||||
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive')
|
||||
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "genitive")
|
||||
|
||||
# Convert to Ordinal
|
||||
# Dates are neuter ("число" implies neuter: "пятое", "пятого")
|
||||
@@ -157,15 +171,15 @@ def numbers_to_words(text: str) -> str:
|
||||
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
|
||||
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
|
||||
|
||||
words = convert_number(day_str, context_type='ordinal', case=case, gender='n')
|
||||
words = convert_number(day_str, context_type="ordinal", case=case, gender="n")
|
||||
|
||||
prefix = f"{prep} " if prep else ""
|
||||
return f"{prefix}{words} {month_word}"
|
||||
|
||||
text = re.sub(
|
||||
r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b',
|
||||
r"(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(" + month_regex + r")\b",
|
||||
replace_date_match,
|
||||
text
|
||||
text,
|
||||
)
|
||||
|
||||
# 3. Handle remaining numbers (Cardinals)
|
||||
@@ -173,32 +187,33 @@ def numbers_to_words(text: str) -> str:
|
||||
prep = match.group(1)
|
||||
num_str = match.group(2)
|
||||
|
||||
case = 'nominative'
|
||||
case = "nominative"
|
||||
if prep:
|
||||
morph_case = get_case_from_preposition(prep.strip())
|
||||
if morph_case:
|
||||
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative')
|
||||
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "nominative")
|
||||
|
||||
words = convert_number(num_str, context_type='cardinal', case=case)
|
||||
words = convert_number(num_str, context_type="cardinal", case=case)
|
||||
|
||||
prefix = f"{prep} " if prep else ""
|
||||
return f"{prefix}{words}"
|
||||
|
||||
text = re.sub(
|
||||
r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b',
|
||||
r"(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b",
|
||||
replace_cardinal_match,
|
||||
text
|
||||
text,
|
||||
)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def clean_response(text: str) -> str:
|
||||
def clean_response(text: str, language: str = "ru") -> str:
|
||||
"""
|
||||
Clean AI response from markdown formatting and special characters.
|
||||
|
||||
Args:
|
||||
text: Raw AI response with possible markdown
|
||||
language: Target language for output (affects post-processing)
|
||||
|
||||
Returns:
|
||||
Clean text suitable for TTS
|
||||
@@ -208,58 +223,64 @@ def clean_response(text: str) -> str:
|
||||
|
||||
# Remove citation references like [1], [2], [citation], etc.
|
||||
# Using hex escapes for brackets to avoid escaping issues
|
||||
text = re.sub(r'\x5B\d+\x5D', '', text)
|
||||
text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE)
|
||||
text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"\x5B\d+\x5D", "", text)
|
||||
text = re.sub(r"\x5Bcitation\s*needed\x5D", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"\x5Bsource\x5D", "", text, flags=re.IGNORECASE)
|
||||
|
||||
# Remove markdown bold **text** and __text__
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||
text = re.sub(r'__(.+?)__', r'\1', text)
|
||||
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
|
||||
text = re.sub(r"__(.+?)__", r"\1", text)
|
||||
|
||||
# Remove markdown italic *text* and _text_
|
||||
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
||||
text = re.sub(r'(?<!\w)_(.+?)_(?!\w)', r'\1', text)
|
||||
text = re.sub(r"\*(.+?)\*", r"\1", text)
|
||||
text = re.sub(r"(?<!\w)_(.+?)_(?!\w)", r"\1", text)
|
||||
|
||||
# Remove markdown strikethrough ~~text~~
|
||||
text = re.sub(r'~~(.+?)~~', r'\1', text)
|
||||
text = re.sub(r"~~(.+?)~~", r"\1", text)
|
||||
|
||||
# Remove markdown headers # ## ### etc.
|
||||
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r"^#{1,6}\s*", "", text, flags=re.MULTILINE)
|
||||
|
||||
# Remove markdown links [text](url) -> text
|
||||
text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text)
|
||||
text = re.sub(r"\x5B([^\x5D]+)\x5D\([^)]+\)", r"\1", text)
|
||||
|
||||
# Remove markdown images 
|
||||
text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text)
|
||||
text = re.sub(r"!\x5B([^\x5D]*)\x5D\([^)]+\)", "", text)
|
||||
|
||||
# Remove inline code `code`
|
||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||
text = re.sub(r"`([^`]+)`", r"\1", text)
|
||||
|
||||
# Remove code blocks ```code```
|
||||
text = re.sub(r'```[\s\S]*?```', '', text)
|
||||
text = re.sub(r"```[\s\S]*?```", "", text)
|
||||
|
||||
# Remove markdown list markers (-, *, +, numbered)
|
||||
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE)
|
||||
text = re.sub(r"^\s*\d+\.\s+", "", text, flags=re.MULTILINE)
|
||||
|
||||
# Remove blockquotes
|
||||
text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r"^\s*>\s*", "", text, flags=re.MULTILINE)
|
||||
|
||||
# Remove horizontal rules
|
||||
text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
|
||||
|
||||
# Remove HTML tags if any
|
||||
text = re.sub(r'<[^>]+>', '', text)
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
|
||||
# Remove informal slang greetings at the beginning of sentences/responses
|
||||
text = re.sub(r'^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*', '', text, flags=re.IGNORECASE | re.MULTILINE)
|
||||
text = re.sub(
|
||||
r"^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*",
|
||||
"",
|
||||
text,
|
||||
flags=re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
|
||||
# Convert numbers to words (Russian)
|
||||
text = numbers_to_words(text)
|
||||
# Convert numbers to words only for Russian, and only if digits exist
|
||||
if language == "ru" and re.search(r"\d", text):
|
||||
text = numbers_to_words(text)
|
||||
|
||||
# Remove extra whitespace
|
||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||
text = re.sub(r' +', ' ', text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
text = re.sub(r" +", " ", text)
|
||||
|
||||
# Clean up and return
|
||||
text = text.strip()
|
||||
|
||||
10
config.py
10
config.py
@@ -2,6 +2,7 @@
|
||||
Configuration module for smart speaker.
|
||||
Loads environment variables from .env file.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
@@ -31,6 +32,13 @@ VOSK_MODEL_PATH = BASE_DIR / "vosk-model-ru-0.42"
|
||||
SAMPLE_RATE = 16000
|
||||
CHANNELS = 1
|
||||
|
||||
# Set timezone to Moscow
|
||||
import time
|
||||
|
||||
os.environ["TZ"] = "Europe/Moscow"
|
||||
time.tzset()
|
||||
|
||||
# TTS configuration
|
||||
TTS_SPEAKER = "eugene" # Available: aidar, baya, kseniya, xenia, eugene
|
||||
TTS_SPEAKER = "eugene" # Available (ru): aidar, baya, kseniya, xenia, eugene
|
||||
TTS_EN_SPEAKER = os.getenv("TTS_EN_SPEAKER", "en_0")
|
||||
TTS_SAMPLE_RATE = 48000
|
||||
|
||||
116
local_stt.py
Normal file
116
local_stt.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
Local offline Speech-to-Text module using Vosk.
|
||||
Used for simple command detection (like "stop") without internet.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pyaudio
|
||||
from vosk import Model, KaldiRecognizer
|
||||
from config import VOSK_MODEL_PATH, SAMPLE_RATE
|
||||
|
||||
class LocalRecognizer:
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
self.rec = None
|
||||
self.pa = None
|
||||
self.stream = None
|
||||
|
||||
def initialize(self):
|
||||
if not os.path.exists(VOSK_MODEL_PATH):
|
||||
print(f"❌ Ошибка: Vosk модель не найдена по пути {VOSK_MODEL_PATH}")
|
||||
return False
|
||||
|
||||
print("📦 Инициализация локального STT (Vosk)...")
|
||||
# Redirect stderr to suppress Vosk logs
|
||||
try:
|
||||
null_fd = os.open(os.devnull, os.O_WRONLY)
|
||||
old_stderr = os.dup(2)
|
||||
sys.stderr.flush()
|
||||
os.dup2(null_fd, 2)
|
||||
os.close(null_fd)
|
||||
|
||||
self.model = Model(str(VOSK_MODEL_PATH))
|
||||
|
||||
# Restore stderr
|
||||
os.dup2(old_stderr, 2)
|
||||
os.close(old_stderr)
|
||||
except Exception as e:
|
||||
print(f"Error initializing Vosk: {e}")
|
||||
return False
|
||||
|
||||
self.rec = KaldiRecognizer(self.model, SAMPLE_RATE)
|
||||
self.pa = pyaudio.PyAudio()
|
||||
return True
|
||||
|
||||
def listen_for_keywords(self, keywords: list, timeout: float = 10.0) -> str:
|
||||
"""
|
||||
Listen for specific keywords locally.
|
||||
Returns the recognized keyword if found, or empty string.
|
||||
"""
|
||||
if not self.model:
|
||||
if not self.initialize():
|
||||
return ""
|
||||
|
||||
# Open stream
|
||||
try:
|
||||
stream = self.pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=4096)
|
||||
stream.start_stream()
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка микрофона: {e}")
|
||||
return ""
|
||||
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
print(f"👂 Локальное слушание ожидает: {keywords}")
|
||||
|
||||
detected_text = ""
|
||||
|
||||
try:
|
||||
while time.time() - start_time < timeout:
|
||||
data = stream.read(4096, exception_on_overflow=False)
|
||||
if self.rec.AcceptWaveform(data):
|
||||
res = json.loads(self.rec.Result())
|
||||
text = res.get("text", "")
|
||||
if text:
|
||||
print(f"📝 Локально: {text}")
|
||||
# Check against keywords
|
||||
for kw in keywords:
|
||||
if kw in text:
|
||||
detected_text = text
|
||||
break
|
||||
else:
|
||||
# Partial result
|
||||
res = json.loads(self.rec.PartialResult())
|
||||
partial = res.get("partial", "")
|
||||
if partial:
|
||||
for kw in keywords:
|
||||
if kw in partial:
|
||||
detected_text = partial
|
||||
break
|
||||
|
||||
if detected_text:
|
||||
break
|
||||
finally:
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
|
||||
return detected_text
|
||||
|
||||
def cleanup(self):
|
||||
if self.pa:
|
||||
self.pa.terminate()
|
||||
|
||||
# Global instance
|
||||
_local_recognizer = None
|
||||
|
||||
def get_local_recognizer():
|
||||
global _local_recognizer
|
||||
if _local_recognizer is None:
|
||||
_local_recognizer = LocalRecognizer()
|
||||
return _local_recognizer
|
||||
|
||||
def listen_for_keywords(keywords: list, timeout: float = 5.0) -> str:
|
||||
"""Listen for keywords using Vosk."""
|
||||
return get_local_recognizer().listen_for_keywords(keywords, timeout)
|
||||
167
main.py
167
main.py
@@ -13,14 +13,22 @@ Flow:
|
||||
|
||||
import signal
|
||||
import sys
|
||||
import re
|
||||
import threading
|
||||
from collections import deque
|
||||
|
||||
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
||||
from wakeword import (
|
||||
wait_for_wakeword,
|
||||
cleanup as cleanup_wakeword,
|
||||
check_wakeword_once,
|
||||
stop_monitoring as stop_wakeword_monitoring,
|
||||
)
|
||||
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
||||
from ai import ask_ai
|
||||
from ai import ask_ai, translate_text
|
||||
from cleaner import clean_response
|
||||
from tts import speak, initialize as init_tts
|
||||
from sound_level import set_volume, parse_volume_text
|
||||
from alarm import get_alarm_clock
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
@@ -31,6 +39,37 @@ def signal_handler(sig, frame):
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def parse_translation_request(text: str):
|
||||
"""
|
||||
Detect translation commands and extract language direction and text.
|
||||
|
||||
Returns:
|
||||
dict with source_lang, target_lang, text or None
|
||||
"""
|
||||
patterns = [
|
||||
(r"^переведи на английский\s*(.*)$", "ru", "en"),
|
||||
(r"^переведи на русский\s*(.*)$", "en", "ru"),
|
||||
(r"^переведи с английского\s*(.*)$", "en", "ru"),
|
||||
(r"^переведи с русского\s*(.*)$", "ru", "en"),
|
||||
(r"^как по[-\s]?английски\s*(.*)$", "ru", "en"),
|
||||
(r"^как по[-\s]?русски\s*(.*)$", "en", "ru"),
|
||||
(r"^translate (?:to|into) english\s*(.*)$", "ru", "en"),
|
||||
(r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"),
|
||||
(r"^translate from english\s*(.*)$", "en", "ru"),
|
||||
(r"^translate from russian\s*(.*)$", "ru", "en"),
|
||||
]
|
||||
|
||||
for pattern, source_lang, target_lang in patterns:
|
||||
match = re.match(pattern, text, flags=re.IGNORECASE)
|
||||
if match:
|
||||
return {
|
||||
"source_lang": source_lang,
|
||||
"target_lang": target_lang,
|
||||
"text": match.group(1).strip(),
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
"""Main application loop."""
|
||||
print("=" * 50)
|
||||
@@ -46,8 +85,31 @@ def main():
|
||||
|
||||
# Pre-initialize models (takes a few seconds)
|
||||
print("⏳ Инициализация моделей...")
|
||||
get_recognizer().initialize() # Initialize STT model first
|
||||
init_tts() # Then initialize TTS model
|
||||
init_errors = []
|
||||
|
||||
def init_stt():
|
||||
try:
|
||||
get_recognizer().initialize()
|
||||
except Exception as e:
|
||||
init_errors.append(e)
|
||||
|
||||
def init_tts_model():
|
||||
try:
|
||||
init_tts()
|
||||
except Exception as e:
|
||||
init_errors.append(e)
|
||||
|
||||
stt_thread = threading.Thread(target=init_stt, daemon=True)
|
||||
tts_thread = threading.Thread(target=init_tts_model, daemon=True)
|
||||
stt_thread.start()
|
||||
tts_thread.start()
|
||||
stt_thread.join()
|
||||
tts_thread.join()
|
||||
|
||||
if init_errors:
|
||||
raise init_errors[0]
|
||||
|
||||
alarm_clock = get_alarm_clock() # Initialize Alarm Clock
|
||||
print()
|
||||
|
||||
# Initialize chat history (last 10 exchanges = 20 messages)
|
||||
@@ -57,37 +119,58 @@ def main():
|
||||
skip_wakeword = False
|
||||
while True:
|
||||
try:
|
||||
# Ensure wake word detector stream is closed before listening
|
||||
stop_wakeword_monitoring()
|
||||
|
||||
# Check for alarms every loop iteration
|
||||
if alarm_clock.check_alarms():
|
||||
# If alarm triggered and finished (user stopped it), we continue loop
|
||||
# The alarm.trigger_alarm() blocks until stopped.
|
||||
skip_wakeword = False # Reset state after alarm
|
||||
continue
|
||||
|
||||
# Step 1: Wait for wake word or Follow-up listen
|
||||
if not skip_wakeword:
|
||||
wait_for_wakeword()
|
||||
# Wait with timeout to allow alarm checking
|
||||
detected = wait_for_wakeword(timeout=1.0)
|
||||
|
||||
# If timeout (not detected), loop again to check alarms
|
||||
if not detected:
|
||||
continue
|
||||
|
||||
# Standard listen after activation
|
||||
user_text = listen(timeout_seconds=7.0)
|
||||
else:
|
||||
# Follow-up listen (wait 2.0s for start, then listen long)
|
||||
print("👂 Слушаю продолжение диалога...")
|
||||
user_text = listen(timeout_seconds=20.0, detection_timeout=2.0)
|
||||
# Follow-up listen (wait 5.0s for start)
|
||||
print("👂 Слушаю продолжение диалога (5 сек)...")
|
||||
user_text = listen(timeout_seconds=10.0, detection_timeout=5.0)
|
||||
|
||||
if not user_text:
|
||||
# User didn't continue conversation, go back to sleep
|
||||
# User didn't continue conversation, go back to sleep silently
|
||||
skip_wakeword = False
|
||||
continue
|
||||
|
||||
# Reset flag for now (will be set to True if we speak successfully)
|
||||
skip_wakeword = False
|
||||
|
||||
# Step 2: Check if speech was recognized
|
||||
if not user_text:
|
||||
# If this was a direct wake word activation but no speech
|
||||
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
||||
skip_wakeword = False # Reset to wake word
|
||||
continue
|
||||
|
||||
# Check for stop commands
|
||||
user_text_lower = user_text.lower().strip()
|
||||
if user_text_lower in ["стоп", "александр", "стоп александр"]:
|
||||
if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]:
|
||||
print("_" * 50)
|
||||
print("💤 Жду 'Alexandr' для активации...")
|
||||
skip_wakeword = False
|
||||
continue
|
||||
|
||||
# Check for alarm commands
|
||||
alarm_response = alarm_clock.parse_command(user_text)
|
||||
if alarm_response:
|
||||
speak(alarm_response)
|
||||
continue
|
||||
|
||||
# Check for volume command
|
||||
if user_text.lower().startswith("громкость"):
|
||||
try:
|
||||
@@ -113,6 +196,46 @@ def main():
|
||||
speak("Не удалось изменить громкость.")
|
||||
continue
|
||||
|
||||
# Check for translation commands
|
||||
translation_request = parse_translation_request(user_text)
|
||||
if translation_request:
|
||||
source_lang = translation_request["source_lang"]
|
||||
target_lang = translation_request["target_lang"]
|
||||
text_to_translate = translation_request["text"]
|
||||
|
||||
if not text_to_translate:
|
||||
prompt = (
|
||||
"Скажи фразу на английском."
|
||||
if source_lang == "en"
|
||||
else "Скажи фразу на русском."
|
||||
)
|
||||
speak(prompt)
|
||||
text_to_translate = listen(
|
||||
timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
|
||||
)
|
||||
|
||||
if not text_to_translate:
|
||||
speak("Я не расслышал текст для перевода.")
|
||||
skip_wakeword = False
|
||||
continue
|
||||
|
||||
translated_text = translate_text(
|
||||
text_to_translate, source_lang, target_lang
|
||||
)
|
||||
clean_text = clean_response(translated_text, language=target_lang)
|
||||
|
||||
completed = speak(
|
||||
clean_text,
|
||||
check_interrupt=check_wakeword_once,
|
||||
language=target_lang,
|
||||
)
|
||||
stop_wakeword_monitoring()
|
||||
skip_wakeword = True
|
||||
|
||||
if not completed:
|
||||
print("⏹️ Перевод прерван - слушаю следующий вопрос")
|
||||
continue
|
||||
|
||||
# Step 3: Send to AI
|
||||
# Add user message to history
|
||||
chat_history.append({"role": "user", "content": user_text})
|
||||
@@ -124,10 +247,16 @@ def main():
|
||||
chat_history.append({"role": "assistant", "content": ai_response})
|
||||
|
||||
# Step 4: Clean response
|
||||
clean_text = clean_response(ai_response)
|
||||
clean_text = clean_response(ai_response, language="ru")
|
||||
|
||||
# Step 5: Speak response (with wake word interrupt support)
|
||||
completed = speak(clean_text, check_interrupt=check_wakeword_once)
|
||||
# This uses check_wakeword_once which opens/closes stream as needed
|
||||
completed = speak(
|
||||
clean_text, check_interrupt=check_wakeword_once, language="ru"
|
||||
)
|
||||
|
||||
# Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once)
|
||||
stop_wakeword_monitoring()
|
||||
|
||||
# Enable follow-up mode for next iteration
|
||||
skip_wakeword = True
|
||||
@@ -136,7 +265,12 @@ def main():
|
||||
# but we can print a message
|
||||
if not completed:
|
||||
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
||||
continue
|
||||
# If interrupted, we treat it as immediate follow up?
|
||||
# Usually interruption means "I have a new command"
|
||||
# So skip_wakeword = True is correct.
|
||||
# But we might want to listen IMMEDIATELY without waiting 5s for start?
|
||||
# listen() handles that.
|
||||
pass
|
||||
|
||||
print()
|
||||
print("-" * 30)
|
||||
@@ -149,6 +283,7 @@ def main():
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка: {e}")
|
||||
speak("Произошла ошибка. Попробуйте ещё раз.")
|
||||
skip_wakeword = False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
30
stt.py
30
stt.py
@@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API.
|
||||
Recognizes speech from microphone using streaming WebSocket.
|
||||
Supports Russian (default) and English.
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import threading
|
||||
@@ -20,6 +21,7 @@ from deepgram import (
|
||||
# Configure logging to suppress debug noise
|
||||
logging.getLogger("deepgram").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
class SpeechRecognizer:
|
||||
"""Speech recognizer using Deepgram streaming."""
|
||||
|
||||
@@ -59,6 +61,7 @@ class SpeechRecognizer:
|
||||
async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
|
||||
"""Async loop to send audio and wait for results."""
|
||||
self.transcript = ""
|
||||
transcript_parts = []
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
stream = self._get_stream()
|
||||
@@ -74,9 +77,11 @@ class SpeechRecognizer:
|
||||
if len(sentence) == 0:
|
||||
return
|
||||
if result.is_final:
|
||||
print(f"📝 Частичный результат: {sentence}")
|
||||
with speech_recognizer_self.lock:
|
||||
speech_recognizer_self.transcript = sentence
|
||||
transcript_parts.append(sentence)
|
||||
speech_recognizer_self.transcript = " ".join(
|
||||
transcript_parts
|
||||
).strip()
|
||||
|
||||
def on_speech_started(unused_self, speech_started, **kwargs):
|
||||
loop.call_soon_threadsafe(speech_started_event.set)
|
||||
@@ -102,7 +107,7 @@ class SpeechRecognizer:
|
||||
channels=1,
|
||||
sample_rate=SAMPLE_RATE,
|
||||
interim_results=True,
|
||||
utterance_end_ms="1200",
|
||||
utterance_end_ms=1200,
|
||||
vad_events=True,
|
||||
)
|
||||
|
||||
@@ -138,7 +143,9 @@ class SpeechRecognizer:
|
||||
# 1. Wait for speech to start (detection_timeout)
|
||||
if detection_timeout:
|
||||
try:
|
||||
await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout)
|
||||
await asyncio.wait_for(
|
||||
speech_started_event.wait(), timeout=detection_timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
# print("Detection timeout - no speech")
|
||||
stop_event.set()
|
||||
@@ -158,7 +165,12 @@ class SpeechRecognizer:
|
||||
|
||||
return self.transcript
|
||||
|
||||
def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
|
||||
def listen(
|
||||
self,
|
||||
timeout_seconds: float = 7.0,
|
||||
detection_timeout: float = None,
|
||||
lang: str = "ru",
|
||||
) -> str:
|
||||
"""
|
||||
Listen to microphone and transcribe speech.
|
||||
"""
|
||||
@@ -172,7 +184,9 @@ class SpeechRecognizer:
|
||||
dg_connection = self.dg_client.listen.live.v("1")
|
||||
|
||||
try:
|
||||
transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout))
|
||||
transcript = asyncio.run(
|
||||
self._process_audio(dg_connection, timeout_seconds, detection_timeout)
|
||||
)
|
||||
|
||||
final_text = transcript.strip() if transcript else ""
|
||||
if final_text:
|
||||
@@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer:
|
||||
return _recognizer
|
||||
|
||||
|
||||
def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
|
||||
def listen(
|
||||
timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru"
|
||||
) -> str:
|
||||
"""Listen to microphone and return transcribed text."""
|
||||
return get_recognizer().listen(timeout_seconds, detection_timeout, lang)
|
||||
|
||||
|
||||
70
tts.py
70
tts.py
@@ -11,7 +11,7 @@ import threading
|
||||
import time
|
||||
import warnings
|
||||
import re
|
||||
from config import TTS_SPEAKER, TTS_SAMPLE_RATE
|
||||
from config import TTS_SPEAKER, TTS_EN_SPEAKER, TTS_SAMPLE_RATE
|
||||
|
||||
# Suppress Silero TTS warning about text length
|
||||
warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols")
|
||||
@@ -21,27 +21,55 @@ class TextToSpeech:
|
||||
"""Text-to-Speech using Silero TTS with wake word interruption support."""
|
||||
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
self.models = {}
|
||||
self.sample_rate = TTS_SAMPLE_RATE
|
||||
self.speaker = TTS_SPEAKER
|
||||
self.speakers = {
|
||||
"ru": TTS_SPEAKER,
|
||||
"en": TTS_EN_SPEAKER,
|
||||
}
|
||||
self._interrupted = False
|
||||
self._stop_flag = threading.Event()
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize Silero TTS model."""
|
||||
print("📦 Загрузка модели Silero TTS v5...")
|
||||
def _load_model(self, language: str):
|
||||
"""Load and cache Silero TTS model for the given language."""
|
||||
if language in self.models:
|
||||
return self.models[language]
|
||||
|
||||
# Load Silero TTS model
|
||||
device = torch.device('cpu')
|
||||
self.model, _ = torch.hub.load(
|
||||
model_config = {
|
||||
"ru": {"language": "ru", "model_id": "v5_ru"},
|
||||
"en": {"language": "en", "model_id": "v3_en"},
|
||||
}
|
||||
|
||||
if language not in model_config:
|
||||
raise ValueError(f"Unsupported TTS language: {language}")
|
||||
|
||||
config = model_config[language]
|
||||
print(f"📦 Загрузка модели Silero TTS ({language})...")
|
||||
|
||||
device = torch.device("cpu")
|
||||
model, _ = torch.hub.load(
|
||||
repo_or_dir="snakers4/silero-models",
|
||||
model="silero_tts",
|
||||
language="ru",
|
||||
speaker="v5_ru",
|
||||
language=config["language"],
|
||||
speaker=config["model_id"],
|
||||
)
|
||||
self.model.to(device)
|
||||
model.to(device)
|
||||
|
||||
print(f"✅ Модель TTS v5 загружена (голос: {self.speaker})")
|
||||
self.models[language] = model
|
||||
return model
|
||||
|
||||
def _get_speaker(self, language: str, model) -> str:
|
||||
"""Return a valid speaker for the loaded model."""
|
||||
speaker = self.speakers.get(language)
|
||||
if hasattr(model, "speakers") and speaker not in model.speakers:
|
||||
fallback = model.speakers[0] if model.speakers else speaker
|
||||
print(f"⚠️ Голос '{speaker}' недоступен, использую '{fallback}'")
|
||||
return fallback
|
||||
return speaker
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize default (Russian) TTS model."""
|
||||
self._load_model("ru")
|
||||
|
||||
def _split_text(self, text: str, max_length: int = 900) -> list[str]:
|
||||
"""Split text into chunks smaller than max_length."""
|
||||
@@ -83,13 +111,14 @@ class TextToSpeech:
|
||||
# Filter empty chunks
|
||||
return [c for c in chunks if c]
|
||||
|
||||
def speak(self, text: str, check_interrupt=None) -> bool:
|
||||
def speak(self, text: str, check_interrupt=None, language: str = "ru") -> bool:
|
||||
"""
|
||||
Convert text to speech and play it.
|
||||
|
||||
Args:
|
||||
text: Text to synthesize and speak
|
||||
check_interrupt: Optional callback function that returns True if playback should stop
|
||||
language: Language code for voice selection ("ru" or "en")
|
||||
|
||||
Returns:
|
||||
True if playback completed normally, False if interrupted
|
||||
@@ -97,8 +126,8 @@ class TextToSpeech:
|
||||
if not text.strip():
|
||||
return True
|
||||
|
||||
if not self.model:
|
||||
self.initialize()
|
||||
model = self._load_model(language)
|
||||
speaker = self._get_speaker(language, model)
|
||||
|
||||
# Split text into manageable chunks
|
||||
chunks = self._split_text(text)
|
||||
@@ -120,8 +149,8 @@ class TextToSpeech:
|
||||
|
||||
try:
|
||||
# Generate audio for chunk
|
||||
audio = self.model.apply_tts(
|
||||
text=chunk, speaker=self.speaker, sample_rate=self.sample_rate
|
||||
audio = model.apply_tts(
|
||||
text=chunk, speaker=speaker, sample_rate=self.sample_rate
|
||||
)
|
||||
|
||||
# Convert to numpy array
|
||||
@@ -218,18 +247,19 @@ def get_tts() -> TextToSpeech:
|
||||
return _tts
|
||||
|
||||
|
||||
def speak(text: str, check_interrupt=None) -> bool:
|
||||
def speak(text: str, check_interrupt=None, language: str = "ru") -> bool:
|
||||
"""
|
||||
Synthesize and speak the given text.
|
||||
|
||||
Args:
|
||||
text: Text to speak
|
||||
check_interrupt: Optional callback for interrupt checking
|
||||
language: Language code for voice selection ("ru" or "en")
|
||||
|
||||
Returns:
|
||||
True if completed normally, False if interrupted
|
||||
"""
|
||||
return get_tts().speak(text, check_interrupt)
|
||||
return get_tts().speak(text, check_interrupt, language)
|
||||
|
||||
|
||||
def was_interrupted() -> bool:
|
||||
|
||||
90
wakeword.py
90
wakeword.py
@@ -15,6 +15,7 @@ class WakeWordDetector:
|
||||
self.porcupine = None
|
||||
self.audio_stream = None
|
||||
self.pa = None
|
||||
self._stream_closed = True # Track state explicitly
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize Porcupine and audio stream."""
|
||||
@@ -24,6 +25,19 @@ class WakeWordDetector:
|
||||
)
|
||||
|
||||
self.pa = pyaudio.PyAudio()
|
||||
self._open_stream()
|
||||
print("🎤 Ожидание wake word 'Alexandr'...")
|
||||
|
||||
def _open_stream(self):
|
||||
"""Open the audio stream."""
|
||||
if self.audio_stream and not self._stream_closed:
|
||||
return
|
||||
|
||||
if self.audio_stream:
|
||||
try:
|
||||
self.audio_stream.close()
|
||||
except: pass
|
||||
|
||||
self.audio_stream = self.pa.open(
|
||||
rate=self.porcupine.sample_rate,
|
||||
channels=1,
|
||||
@@ -31,44 +45,47 @@ class WakeWordDetector:
|
||||
input=True,
|
||||
frames_per_buffer=self.porcupine.frame_length
|
||||
)
|
||||
print("🎤 Ожидание wake word 'Alexandr'...")
|
||||
self._stream_closed = False
|
||||
|
||||
def wait_for_wakeword(self) -> bool:
|
||||
def stop_monitoring(self):
|
||||
"""Explicitly stop and close the stream."""
|
||||
if self.audio_stream and not self._stream_closed:
|
||||
try:
|
||||
self.audio_stream.stop_stream()
|
||||
self.audio_stream.close()
|
||||
except: pass
|
||||
self._stream_closed = True
|
||||
|
||||
def wait_for_wakeword(self, timeout: float = None) -> bool:
|
||||
"""
|
||||
Blocks until wake word is detected.
|
||||
Returns True when wake word is detected.
|
||||
Blocks until wake word is detected or timeout expires.
|
||||
|
||||
Args:
|
||||
timeout: Maximum seconds to wait. None = infinite.
|
||||
|
||||
Returns:
|
||||
True if wake word detected, False if timeout.
|
||||
"""
|
||||
import time
|
||||
if not self.porcupine:
|
||||
self.initialize()
|
||||
|
||||
# Ensure stream is open and active
|
||||
if self.audio_stream is None or not self.audio_stream.is_active():
|
||||
# If closed or None, we might need to recreate it.
|
||||
# PyAudio streams once closed cannot be reopened usually?
|
||||
# We should probably recreate it.
|
||||
if self.audio_stream:
|
||||
try:
|
||||
self.audio_stream.close()
|
||||
except: pass
|
||||
# Ensure stream is open
|
||||
self._open_stream()
|
||||
|
||||
self.audio_stream = self.pa.open(
|
||||
rate=self.porcupine.sample_rate,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=self.porcupine.frame_length
|
||||
)
|
||||
start_time = time.time()
|
||||
|
||||
while True:
|
||||
if timeout and (time.time() - start_time > timeout):
|
||||
return False
|
||||
|
||||
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||
|
||||
keyword_index = self.porcupine.process(pcm)
|
||||
if keyword_index >= 0:
|
||||
print("✅ Wake word обнаружен!")
|
||||
# Stop and CLOSE stream to release mic for STT
|
||||
self.audio_stream.stop_stream()
|
||||
self.audio_stream.close()
|
||||
self.stop_monitoring()
|
||||
return True
|
||||
|
||||
def check_wakeword_once(self) -> bool:
|
||||
@@ -80,20 +97,8 @@ class WakeWordDetector:
|
||||
self.initialize()
|
||||
|
||||
try:
|
||||
# Ensure stream is open/active
|
||||
if self.audio_stream is None or not self.audio_stream.is_active():
|
||||
# Re-open if needed (similar to wait_for_wakeword logic)
|
||||
if self.audio_stream:
|
||||
try:
|
||||
self.audio_stream.close()
|
||||
except: pass
|
||||
self.audio_stream = self.pa.open(
|
||||
rate=self.porcupine.sample_rate,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=self.porcupine.frame_length
|
||||
)
|
||||
# Ensure stream is open
|
||||
self._open_stream()
|
||||
|
||||
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||
@@ -108,8 +113,7 @@ class WakeWordDetector:
|
||||
|
||||
def cleanup(self):
|
||||
"""Release resources."""
|
||||
if self.audio_stream:
|
||||
self.audio_stream.close()
|
||||
self.stop_monitoring()
|
||||
if self.pa:
|
||||
self.pa.terminate()
|
||||
if self.porcupine:
|
||||
@@ -128,10 +132,14 @@ def get_detector() -> WakeWordDetector:
|
||||
return _detector
|
||||
|
||||
|
||||
def wait_for_wakeword() -> bool:
|
||||
def wait_for_wakeword(timeout: float = None) -> bool:
|
||||
"""Wait for wake word detection."""
|
||||
return get_detector().wait_for_wakeword()
|
||||
return get_detector().wait_for_wakeword(timeout)
|
||||
|
||||
def stop_monitoring():
|
||||
"""Stop monitoring for wake word."""
|
||||
if _detector:
|
||||
_detector.stop_monitoring()
|
||||
|
||||
def cleanup():
|
||||
"""Cleanup detector resources."""
|
||||
|
||||
Reference in New Issue
Block a user