translator но без озвучивания слов на английском
This commit is contained in:
BIN
Apex-1.mp3
Normal file
BIN
Apex-1.mp3
Normal file
Binary file not shown.
67
ai.py
67
ai.py
@@ -16,6 +16,10 @@ SYSTEM_PROMPT = """Ты — Александр, умный голосовой а
|
|||||||
Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов.
|
Пиши в разговорном стиле, как при живом общении, но не забывай о вежливости и правильности твоих ответов.
|
||||||
ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные."""
|
ВАЖНО: Не используй в ответах панибратские или сленговые приветствия и обращения, такие как "Эй", "Хэй", "Слушай" в начале фразы и подобные."""
|
||||||
|
|
||||||
|
TRANSLATION_SYSTEM_PROMPT = """You are a translation engine.
|
||||||
|
Translate from {source} to {target}.
|
||||||
|
Return only the translated text, without quotes, comments, or explanations."""
|
||||||
|
|
||||||
|
|
||||||
def ask_ai(messages_history: list) -> str:
|
def ask_ai(messages_history: list) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -72,3 +76,66 @@ def ask_ai(messages_history: list) -> str:
|
|||||||
except (KeyError, IndexError) as e:
|
except (KeyError, IndexError) as e:
|
||||||
print(f"❌ Ошибка парсинга ответа: {e}")
|
print(f"❌ Ошибка парсинга ответа: {e}")
|
||||||
return "Не удалось обработать ответ от AI."
|
return "Не удалось обработать ответ от AI."
|
||||||
|
|
||||||
|
|
||||||
|
def translate_text(text: str, source_lang: str, target_lang: str) -> str:
|
||||||
|
"""
|
||||||
|
Translate text using Perplexity AI.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to translate
|
||||||
|
source_lang: Source language code ("ru" or "en")
|
||||||
|
target_lang: Target language code ("ru" or "en")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Translated text
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return "Извините, я не расслышал текст для перевода."
|
||||||
|
|
||||||
|
lang_names = {"ru": "Russian", "en": "English"}
|
||||||
|
source_name = lang_names.get(source_lang, source_lang)
|
||||||
|
target_name = lang_names.get(target_lang, target_lang)
|
||||||
|
|
||||||
|
print(f"🌍 Перевод: {source_name} -> {target_name}: {text[:60]}...")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": TRANSLATION_SYSTEM_PROMPT.format(
|
||||||
|
source=source_name, target=target_name
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{"role": "user", "content": text},
|
||||||
|
]
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": PERPLEXITY_MODEL,
|
||||||
|
"messages": messages,
|
||||||
|
"max_tokens": 400,
|
||||||
|
"temperature": 0.2,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
PERPLEXITY_API_URL, headers=headers, json=payload, timeout=30
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
ai_response = data["choices"][0]["message"]["content"]
|
||||||
|
return ai_response.strip()
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return "Извините, сервер не отвечает. Попробуйте позже."
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"❌ Ошибка API перевода: {e}")
|
||||||
|
return "Произошла ошибка при переводе. Попробуйте ещё раз."
|
||||||
|
except (KeyError, IndexError) as e:
|
||||||
|
print(f"❌ Ошибка парсинга ответа перевода: {e}")
|
||||||
|
return "Не удалось обработать перевод."
|
||||||
|
|||||||
194
alarm.py
Normal file
194
alarm.py
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
"""
|
||||||
|
Alarm clock module.
|
||||||
|
Handles alarm scheduling, persistence, and playback.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from config import BASE_DIR
|
||||||
|
from local_stt import listen_for_keywords
|
||||||
|
|
||||||
|
ALARM_FILE = BASE_DIR / "alarms.json"
|
||||||
|
ALARM_SOUND = BASE_DIR / "Apex-1.mp3"
|
||||||
|
|
||||||
|
class AlarmClock:
|
||||||
|
def __init__(self):
|
||||||
|
self.alarms = []
|
||||||
|
self.load_alarms()
|
||||||
|
|
||||||
|
def load_alarms(self):
|
||||||
|
"""Load alarms from JSON file."""
|
||||||
|
if ALARM_FILE.exists():
|
||||||
|
try:
|
||||||
|
with open(ALARM_FILE, "r", encoding="utf-8") as f:
|
||||||
|
self.alarms = json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка загрузки будильников: {e}")
|
||||||
|
self.alarms = []
|
||||||
|
|
||||||
|
def save_alarms(self):
|
||||||
|
"""Save alarms to JSON file."""
|
||||||
|
try:
|
||||||
|
with open(ALARM_FILE, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(self.alarms, f, indent=4)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка сохранения будильников: {e}")
|
||||||
|
|
||||||
|
def add_alarm(self, hour: int, minute: int):
|
||||||
|
"""Add a new alarm."""
|
||||||
|
# Check if already exists
|
||||||
|
for alarm in self.alarms:
|
||||||
|
if alarm["hour"] == hour and alarm["minute"] == minute:
|
||||||
|
alarm["active"] = True
|
||||||
|
self.save_alarms()
|
||||||
|
return
|
||||||
|
|
||||||
|
self.alarms.append({
|
||||||
|
"hour": hour,
|
||||||
|
"minute": minute,
|
||||||
|
"active": True
|
||||||
|
})
|
||||||
|
self.save_alarms()
|
||||||
|
print(f"⏰ Будильник установлен на {hour:02d}:{minute:02d}")
|
||||||
|
|
||||||
|
def cancel_all_alarms(self):
|
||||||
|
"""Cancel all active alarms."""
|
||||||
|
for alarm in self.alarms:
|
||||||
|
alarm["active"] = False
|
||||||
|
self.save_alarms()
|
||||||
|
print("🔕 Все будильники отменены.")
|
||||||
|
|
||||||
|
def check_alarms(self):
|
||||||
|
"""Check if any alarm should trigger now. Returns True if triggered."""
|
||||||
|
now = datetime.now()
|
||||||
|
triggered = False
|
||||||
|
|
||||||
|
for alarm in self.alarms:
|
||||||
|
if alarm["active"]:
|
||||||
|
if alarm["hour"] == now.hour and alarm["minute"] == now.minute:
|
||||||
|
# Prevent re-triggering within the same minute?
|
||||||
|
# We should disable it immediately or track last trigger time.
|
||||||
|
# For simple logic: disable it (one-time alarm).
|
||||||
|
|
||||||
|
# But wait, checking every second?
|
||||||
|
# If I disable it, it won't ring for the whole minute.
|
||||||
|
# Correct.
|
||||||
|
print(f"⏰ ВРЕМЯ БУДИЛЬНИКА: {alarm['hour']:02d}:{alarm['minute']:02d}")
|
||||||
|
alarm["active"] = False
|
||||||
|
triggered = True
|
||||||
|
self.trigger_alarm()
|
||||||
|
break # Trigger one at a time
|
||||||
|
|
||||||
|
if triggered:
|
||||||
|
self.save_alarms()
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def trigger_alarm(self):
|
||||||
|
"""Play alarm sound and wait for stop command."""
|
||||||
|
print("🔔 БУДИЛЬНИК ЗВОНИТ! (Скажите 'Стоп' или 'Александр стоп')")
|
||||||
|
|
||||||
|
# Start playing sound in loop
|
||||||
|
# -q for quiet (no output)
|
||||||
|
# --loop -1 for infinite loop
|
||||||
|
cmd = ["mpg123", "-q", "--loop", "-1", str(ALARM_SOUND)]
|
||||||
|
|
||||||
|
try:
|
||||||
|
process = subprocess.Popen(cmd)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("❌ Ошибка: mpg123 не найден. Установите его: sudo apt install mpg123")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Listen for stop command using local Vosk
|
||||||
|
# Loop until stop word is heard
|
||||||
|
stop_words = ["стоп", "хватит", "тихо", "замолчи", "отмена", "александр стоп"]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Listen in short bursts to be responsive
|
||||||
|
text = listen_for_keywords(stop_words, timeout=3.0)
|
||||||
|
if text:
|
||||||
|
print(f"🛑 Будильник остановлен по команде: '{text}'")
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка во время будильника: {e}")
|
||||||
|
finally:
|
||||||
|
# Kill the player
|
||||||
|
process.terminate()
|
||||||
|
try:
|
||||||
|
process.wait(timeout=1)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
process.kill()
|
||||||
|
print("🔕 Будильник выключен.")
|
||||||
|
|
||||||
|
def parse_command(self, text: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Parse user text for alarm commands.
|
||||||
|
Returns response string if command handled, None otherwise.
|
||||||
|
"""
|
||||||
|
text = text.lower()
|
||||||
|
if "будильник" not in text and "разбуди" not in text:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "отмени" in text:
|
||||||
|
self.cancel_all_alarms()
|
||||||
|
return "Хорошо, я отменил все будильники."
|
||||||
|
|
||||||
|
# Regex to find time: HH:MM, HH-MM, HH MM, HH часов MM минут
|
||||||
|
# 1. "07:30", "7:30"
|
||||||
|
match = re.search(r'\b(\d{1,2})[:.-](\d{2})\b', text)
|
||||||
|
if match:
|
||||||
|
h, m = int(match.group(1)), int(match.group(2))
|
||||||
|
if 0 <= h <= 23 and 0 <= m <= 59:
|
||||||
|
self.add_alarm(h, m)
|
||||||
|
return f"Я установил будильник на {h} часов {m} минут."
|
||||||
|
|
||||||
|
# 2. "7 часов 30 минут" or "7 30"
|
||||||
|
# Search for pattern: digits ... (digits)?
|
||||||
|
# Complex to separate from other numbers.
|
||||||
|
|
||||||
|
# Simple heuristics:
|
||||||
|
words = text.split()
|
||||||
|
nums = [int(s) for s in text.split() if s.isdigit()]
|
||||||
|
|
||||||
|
# "на 7" -> 7:00
|
||||||
|
if "на" in words or "в" in words:
|
||||||
|
# Try to find number after preposition
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Let's rely on explicit digit search if regex failed
|
||||||
|
# Patterns: "на 8", "на 8 30", "на 8 часов 30 минут", "на 8 часов"
|
||||||
|
|
||||||
|
# Regex to capture hour and optional minute
|
||||||
|
# Matches: "на <H> [часов] [M] [минут]"
|
||||||
|
match_time = re.search(r'на\s+(\d{1,2})(?:\s*(?:часов|часа|час))?(?:\s+(\d{1,2})(?:\s*(?:минут|минуты|минута))?)?', text)
|
||||||
|
|
||||||
|
if match_time:
|
||||||
|
h = int(match_time.group(1))
|
||||||
|
m = int(match_time.group(2)) if match_time.group(2) else 0
|
||||||
|
|
||||||
|
# Handle AM/PM if specified
|
||||||
|
if "вечера" in text and h < 12:
|
||||||
|
h += 12
|
||||||
|
elif "утра" in text and h == 12:
|
||||||
|
h = 0
|
||||||
|
|
||||||
|
if 0 <= h <= 23 and 0 <= m <= 59:
|
||||||
|
self.add_alarm(h, m)
|
||||||
|
return f"Хорошо, разбужу вас в {h}:{m:02d}."
|
||||||
|
|
||||||
|
return "Я не понял время для будильника. Пожалуйста, скажите точное время, например 'семь тридцать'."
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
_alarm_clock = None
|
||||||
|
|
||||||
|
def get_alarm_clock():
|
||||||
|
global _alarm_clock
|
||||||
|
if _alarm_clock is None:
|
||||||
|
_alarm_clock = AlarmClock()
|
||||||
|
return _alarm_clock
|
||||||
12
alarms.json
Normal file
12
alarms.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"hour": 10,
|
||||||
|
"minute": 15,
|
||||||
|
"active": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hour": 3,
|
||||||
|
"minute": 42,
|
||||||
|
"active": false
|
||||||
|
}
|
||||||
|
]
|
||||||
285
cleaner.py
285
cleaner.py
@@ -3,6 +3,7 @@ Response cleaner module.
|
|||||||
Removes markdown formatting and special characters from AI responses.
|
Removes markdown formatting and special characters from AI responses.
|
||||||
Handles complex number-to-text conversion for Russian language.
|
Handles complex number-to-text conversion for Russian language.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import pymorphy3
|
import pymorphy3
|
||||||
from num2words import num2words
|
from num2words import num2words
|
||||||
@@ -12,79 +13,86 @@ morph = pymorphy3.MorphAnalyzer()
|
|||||||
|
|
||||||
# Preposition to case mapping (simplified heuristics)
|
# Preposition to case mapping (simplified heuristics)
|
||||||
PREPOSITION_CASES = {
|
PREPOSITION_CASES = {
|
||||||
'в': 'loct', # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
|
"в": "loct", # Prepositional (Locative 2) or Accusative. 'v godu' -> loct
|
||||||
'во': 'loct',
|
"во": "loct",
|
||||||
'на': 'accs', # Dates: 'na 5 maya' -> Accusative (na pyatoe)
|
"на": "accs", # Dates: 'na 5 maya' -> Accusative (na pyatoe)
|
||||||
'о': 'loct',
|
"о": "loct",
|
||||||
'об': 'loct',
|
"об": "loct",
|
||||||
'обо': 'loct',
|
"обо": "loct",
|
||||||
'при': 'loct',
|
"при": "loct",
|
||||||
'у': 'gent',
|
"у": "gent",
|
||||||
'от': 'gent',
|
"от": "gent",
|
||||||
'до': 'gent',
|
"до": "gent",
|
||||||
'из': 'gent',
|
"из": "gent",
|
||||||
'с': 'gent', # or ablt (instrumental)
|
"с": "gent", # or ablt (instrumental)
|
||||||
'со': 'gent',
|
"со": "gent",
|
||||||
'без': 'gent',
|
"без": "gent",
|
||||||
'для': 'gent',
|
"для": "gent",
|
||||||
'вокруг': 'gent',
|
"вокруг": "gent",
|
||||||
'после': 'gent',
|
"после": "gent",
|
||||||
'к': 'datv',
|
"к": "datv",
|
||||||
'ко': 'datv',
|
"ко": "datv",
|
||||||
'по': 'datv', # or accs for dates (limit). Heuristic: datv defaults usually.
|
"по": "datv", # or accs for dates (limit). Heuristic: datv defaults usually.
|
||||||
'над': 'ablt',
|
"над": "ablt",
|
||||||
'под': 'ablt',
|
"под": "ablt",
|
||||||
'перед': 'ablt',
|
"перед": "ablt",
|
||||||
'за': 'ablt', # or acc
|
"за": "ablt", # or acc
|
||||||
'между': 'ablt',
|
"между": "ablt",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Mapping pymorphy cases to num2words cases
|
# Mapping pymorphy cases to num2words cases
|
||||||
PYMORPHY_TO_NUM2WORDS = {
|
PYMORPHY_TO_NUM2WORDS = {
|
||||||
'nomn': 'nominative',
|
"nomn": "nominative",
|
||||||
'gent': 'genitive',
|
"gent": "genitive",
|
||||||
'datv': 'dative',
|
"datv": "dative",
|
||||||
'accs': 'accusative',
|
"accs": "accusative",
|
||||||
'ablt': 'instrumental',
|
"ablt": "instrumental",
|
||||||
'loct': 'prepositional',
|
"loct": "prepositional",
|
||||||
'voct': 'nominative', # Fallback
|
"voct": "nominative", # Fallback
|
||||||
'gen2': 'genitive',
|
"gen2": "genitive",
|
||||||
'acc2': 'accusative',
|
"acc2": "accusative",
|
||||||
'loc2': 'prepositional',
|
"loc2": "prepositional",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Month names in Genitive case (as they appear in dates)
|
# Month names in Genitive case (as they appear in dates)
|
||||||
MONTHS_GENITIVE = [
|
MONTHS_GENITIVE = [
|
||||||
'января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
|
"января",
|
||||||
'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'
|
"февраля",
|
||||||
|
"марта",
|
||||||
|
"апреля",
|
||||||
|
"мая",
|
||||||
|
"июня",
|
||||||
|
"июля",
|
||||||
|
"августа",
|
||||||
|
"сентября",
|
||||||
|
"октября",
|
||||||
|
"ноября",
|
||||||
|
"декабря",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_case_from_preposition(prep_token):
|
def get_case_from_preposition(prep_token):
|
||||||
"""Return pymorphy case based on preposition."""
|
"""Return pymorphy case based on preposition."""
|
||||||
if not prep_token:
|
if not prep_token:
|
||||||
return None
|
return None
|
||||||
return PREPOSITION_CASES.get(prep_token.lower())
|
return PREPOSITION_CASES.get(prep_token.lower())
|
||||||
|
|
||||||
def convert_number(number_str, context_type='cardinal', case='nominative', gender='m'):
|
|
||||||
|
def convert_number(number_str, context_type="cardinal", case="nominative", gender="m"):
|
||||||
"""Convert a number string to words with specific parameters."""
|
"""Convert a number string to words with specific parameters."""
|
||||||
try:
|
try:
|
||||||
# Handle floats
|
# Handle floats
|
||||||
if '.' in number_str or ',' in number_str:
|
if "." in number_str or "," in number_str:
|
||||||
num_val = float(number_str.replace(',', '.'))
|
num_val = float(number_str.replace(",", "."))
|
||||||
else:
|
else:
|
||||||
num_val = int(number_str)
|
num_val = int(number_str)
|
||||||
|
|
||||||
return num2words(
|
return num2words(num_val, lang="ru", to=context_type, case=case, gender=gender)
|
||||||
num_val,
|
|
||||||
lang='ru',
|
|
||||||
to=context_type,
|
|
||||||
case=case,
|
|
||||||
gender=gender
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error converting number {number_str}: {e}")
|
print(f"Error converting number {number_str}: {e}")
|
||||||
return number_str
|
return number_str
|
||||||
|
|
||||||
|
|
||||||
def numbers_to_words(text: str) -> str:
|
def numbers_to_words(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Intelligent conversion of digits in text to Russian words.
|
Intelligent conversion of digits in text to Russian words.
|
||||||
@@ -96,59 +104,65 @@ def numbers_to_words(text: str) -> str:
|
|||||||
# 1. Identify "Year" patterns: "1999 год", "в 2024 году"
|
# 1. Identify "Year" patterns: "1999 год", "в 2024 году"
|
||||||
def replace_year_match(match):
|
def replace_year_match(match):
|
||||||
full_str = match.group(0)
|
full_str = match.group(0)
|
||||||
prep = match.group(1) # Could be None
|
prep = match.group(1) # Could be None
|
||||||
year_str = match.group(2)
|
year_str = match.group(2)
|
||||||
year_word = match.group(3) # год, году, года...
|
year_word = match.group(3) # год, году, года...
|
||||||
|
|
||||||
parsed = morph.parse(year_word)[0]
|
parsed = morph.parse(year_word)[0]
|
||||||
case_tag = parsed.tag.case
|
case_tag = parsed.tag.case
|
||||||
|
|
||||||
if prep and prep.strip().lower() in ['в', 'во'] and case_tag in ['accs', 'nomn']:
|
|
||||||
pass
|
|
||||||
|
|
||||||
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, 'nominative')
|
if (
|
||||||
|
prep
|
||||||
words = convert_number(year_str, context_type='ordinal', case=nw_case, gender='m')
|
and prep.strip().lower() in ["в", "во"]
|
||||||
|
and case_tag in ["accs", "nomn"]
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
nw_case = PYMORPHY_TO_NUM2WORDS.get(case_tag, "nominative")
|
||||||
|
|
||||||
|
words = convert_number(
|
||||||
|
year_str, context_type="ordinal", case=nw_case, gender="m"
|
||||||
|
)
|
||||||
|
|
||||||
prefix = f"{prep} " if prep else ""
|
prefix = f"{prep} " if prep else ""
|
||||||
return f"{prefix}{words} {year_word}"
|
return f"{prefix}{words} {year_word}"
|
||||||
|
|
||||||
text = re.sub(
|
text = re.sub(
|
||||||
r'(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b',
|
r"(?i)\b((?:в|с|к|до|от)\s+)?(\d{3,4})\s+(год[а-я]*)\b",
|
||||||
replace_year_match,
|
replace_year_match,
|
||||||
text
|
text,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Identify "Date" patterns: "25 июня", "с 1 мая"
|
# 2. Identify "Date" patterns: "25 июня", "с 1 мая"
|
||||||
# Matches: (Preposition)? (Day) (Month_Genitive)
|
# Matches: (Preposition)? (Day) (Month_Genitive)
|
||||||
# Day is usually 1-31.
|
# Day is usually 1-31.
|
||||||
month_regex = '|'.join(MONTHS_GENITIVE)
|
month_regex = "|".join(MONTHS_GENITIVE)
|
||||||
|
|
||||||
def replace_date_match(match):
|
def replace_date_match(match):
|
||||||
prep = match.group(1)
|
prep = match.group(1)
|
||||||
day_str = match.group(2)
|
day_str = match.group(2)
|
||||||
month_word = match.group(3)
|
month_word = match.group(3)
|
||||||
|
|
||||||
# Determine case
|
# Determine case
|
||||||
# Default to Genitive ("25 июня" -> "двадцать пятого июня")
|
# Default to Genitive ("25 июня" -> "двадцать пятого июня")
|
||||||
case = 'genitive'
|
case = "genitive"
|
||||||
|
|
||||||
if prep:
|
if prep:
|
||||||
prep_clean = prep.strip().lower()
|
prep_clean = prep.strip().lower()
|
||||||
# Specific overrides for dates
|
# Specific overrides for dates
|
||||||
if prep_clean == 'на':
|
if prep_clean == "на":
|
||||||
case = 'accusative' # на 5 мая -> на пятое
|
case = "accusative" # на 5 мая -> на пятое
|
||||||
elif prep_clean == 'по':
|
elif prep_clean == "по":
|
||||||
case = 'accusative' # по 5 мая -> по пятое (limit)
|
case = "accusative" # по 5 мая -> по пятое (limit)
|
||||||
elif prep_clean == 'к':
|
elif prep_clean == "к":
|
||||||
case = 'dative' # к 5 мая -> к пятому
|
case = "dative" # к 5 мая -> к пятому
|
||||||
elif prep_clean in ['с', 'до', 'от']:
|
elif prep_clean in ["с", "до", "от"]:
|
||||||
case = 'genitive' # с 5 мая -> с пятого
|
case = "genitive" # с 5 мая -> с пятого
|
||||||
else:
|
else:
|
||||||
# Fallback to general preposition map
|
# Fallback to general preposition map
|
||||||
morph_case = get_case_from_preposition(prep_clean)
|
morph_case = get_case_from_preposition(prep_clean)
|
||||||
if morph_case:
|
if morph_case:
|
||||||
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'genitive')
|
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "genitive")
|
||||||
|
|
||||||
# Convert to Ordinal
|
# Convert to Ordinal
|
||||||
# Dates are neuter ("число" implies neuter: "пятое", "пятого")
|
# Dates are neuter ("число" implies neuter: "пятое", "пятого")
|
||||||
@@ -156,112 +170,119 @@ def numbers_to_words(text: str) -> str:
|
|||||||
# 5, ordinal, genitive -> "пятого" (masc/neut are same)
|
# 5, ordinal, genitive -> "пятого" (masc/neut are same)
|
||||||
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
|
# 5, ordinal, accusative -> "пятое" (neuter) vs "пятый" (masc inanimate?)
|
||||||
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
|
# Let's specify gender='n' (neuter) for dates to be safe (пятое, пятого, пятому).
|
||||||
|
|
||||||
words = convert_number(day_str, context_type='ordinal', case=case, gender='n')
|
words = convert_number(day_str, context_type="ordinal", case=case, gender="n")
|
||||||
|
|
||||||
prefix = f"{prep} " if prep else ""
|
prefix = f"{prep} " if prep else ""
|
||||||
return f"{prefix}{words} {month_word}"
|
return f"{prefix}{words} {month_word}"
|
||||||
|
|
||||||
text = re.sub(
|
text = re.sub(
|
||||||
r'(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(' + month_regex + r')\b',
|
r"(?i)\b((?:с|к|до|от|на|по)\s+)?(\d{1,2})\s+(" + month_regex + r")\b",
|
||||||
replace_date_match,
|
replace_date_match,
|
||||||
text
|
text,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. Handle remaining numbers (Cardinals)
|
# 3. Handle remaining numbers (Cardinals)
|
||||||
def replace_cardinal_match(match):
|
def replace_cardinal_match(match):
|
||||||
prep = match.group(1)
|
prep = match.group(1)
|
||||||
num_str = match.group(2)
|
num_str = match.group(2)
|
||||||
|
|
||||||
case = 'nominative'
|
case = "nominative"
|
||||||
if prep:
|
if prep:
|
||||||
morph_case = get_case_from_preposition(prep.strip())
|
morph_case = get_case_from_preposition(prep.strip())
|
||||||
if morph_case:
|
if morph_case:
|
||||||
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, 'nominative')
|
case = PYMORPHY_TO_NUM2WORDS.get(morph_case, "nominative")
|
||||||
|
|
||||||
words = convert_number(num_str, context_type='cardinal', case=case)
|
words = convert_number(num_str, context_type="cardinal", case=case)
|
||||||
|
|
||||||
prefix = f"{prep} " if prep else ""
|
prefix = f"{prep} " if prep else ""
|
||||||
return f"{prefix}{words}"
|
return f"{prefix}{words}"
|
||||||
|
|
||||||
text = re.sub(
|
text = re.sub(
|
||||||
r'(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b',
|
r"(?i)\b((?:в|на|о|об|обо|при|у|от|до|из|с|со|без|для|вокруг|после|к|ко|по|над|под|перед|за|между)\s+)?(\d+(?:[.,]\d+)?)\b",
|
||||||
replace_cardinal_match,
|
replace_cardinal_match,
|
||||||
text
|
text,
|
||||||
)
|
)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def clean_response(text: str) -> str:
|
def clean_response(text: str, language: str = "ru") -> str:
|
||||||
"""
|
"""
|
||||||
Clean AI response from markdown formatting and special characters.
|
Clean AI response from markdown formatting and special characters.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Raw AI response with possible markdown
|
text: Raw AI response with possible markdown
|
||||||
|
language: Target language for output (affects post-processing)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Clean text suitable for TTS
|
Clean text suitable for TTS
|
||||||
"""
|
"""
|
||||||
if not text:
|
if not text:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Remove citation references like [1], [2], [citation], etc.
|
# Remove citation references like [1], [2], [citation], etc.
|
||||||
# Using hex escapes for brackets to avoid escaping issues
|
# Using hex escapes for brackets to avoid escaping issues
|
||||||
text = re.sub(r'\x5B\d+\x5D', '', text)
|
text = re.sub(r"\x5B\d+\x5D", "", text)
|
||||||
text = re.sub(r'\x5Bcitation\s*needed\x5D', '', text, flags=re.IGNORECASE)
|
text = re.sub(r"\x5Bcitation\s*needed\x5D", "", text, flags=re.IGNORECASE)
|
||||||
text = re.sub(r'\x5Bsource\x5D', '', text, flags=re.IGNORECASE)
|
text = re.sub(r"\x5Bsource\x5D", "", text, flags=re.IGNORECASE)
|
||||||
|
|
||||||
# Remove markdown bold **text** and __text__
|
# Remove markdown bold **text** and __text__
|
||||||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
|
||||||
text = re.sub(r'__(.+?)__', r'\1', text)
|
text = re.sub(r"__(.+?)__", r"\1", text)
|
||||||
|
|
||||||
# Remove markdown italic *text* and _text_
|
# Remove markdown italic *text* and _text_
|
||||||
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
text = re.sub(r"\*(.+?)\*", r"\1", text)
|
||||||
text = re.sub(r'(?<!\w)_(.+?)_(?!\w)', r'\1', text)
|
text = re.sub(r"(?<!\w)_(.+?)_(?!\w)", r"\1", text)
|
||||||
|
|
||||||
# Remove markdown strikethrough ~~text~~
|
# Remove markdown strikethrough ~~text~~
|
||||||
text = re.sub(r'~~(.+?)~~', r'\1', text)
|
text = re.sub(r"~~(.+?)~~", r"\1", text)
|
||||||
|
|
||||||
# Remove markdown headers # ## ### etc.
|
# Remove markdown headers # ## ### etc.
|
||||||
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
text = re.sub(r"^#{1,6}\s*", "", text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove markdown links [text](url) -> text
|
# Remove markdown links [text](url) -> text
|
||||||
text = re.sub(r'\x5B([^\x5D]+)\x5D\([^)]+\)', r'\1', text)
|
text = re.sub(r"\x5B([^\x5D]+)\x5D\([^)]+\)", r"\1", text)
|
||||||
|
|
||||||
# Remove markdown images 
|
# Remove markdown images 
|
||||||
text = re.sub(r'!\x5B([^\x5D]*)\x5D\([^)]+\)', '', text)
|
text = re.sub(r"!\x5B([^\x5D]*)\x5D\([^)]+\)", "", text)
|
||||||
|
|
||||||
# Remove inline code `code`
|
# Remove inline code `code`
|
||||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
text = re.sub(r"`([^`]+)`", r"\1", text)
|
||||||
|
|
||||||
# Remove code blocks ```code```
|
# Remove code blocks ```code```
|
||||||
text = re.sub(r'```[\s\S]*?```', '', text)
|
text = re.sub(r"```[\s\S]*?```", "", text)
|
||||||
|
|
||||||
# Remove markdown list markers (-, *, +, numbered)
|
# Remove markdown list markers (-, *, +, numbered)
|
||||||
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
|
text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE)
|
||||||
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
text = re.sub(r"^\s*\d+\.\s+", "", text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove blockquotes
|
# Remove blockquotes
|
||||||
text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE)
|
text = re.sub(r"^\s*>\s*", "", text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove horizontal rules
|
# Remove horizontal rules
|
||||||
text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove HTML tags if any
|
# Remove HTML tags if any
|
||||||
text = re.sub(r'<[^>]+>', '', text)
|
text = re.sub(r"<[^>]+>", "", text)
|
||||||
|
|
||||||
# Remove informal slang greetings at the beginning of sentences/responses
|
# Remove informal slang greetings at the beginning of sentences/responses
|
||||||
text = re.sub(r'^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*', '', text, flags=re.IGNORECASE | re.MULTILINE)
|
text = re.sub(
|
||||||
|
r"^(Эй|Хэй|Слушай|Так|Ну|Короче|В\s+общем)[,!?:]?\s*",
|
||||||
# Convert numbers to words (Russian)
|
"",
|
||||||
text = numbers_to_words(text)
|
text,
|
||||||
|
flags=re.IGNORECASE | re.MULTILINE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert numbers to words only for Russian, and only if digits exist
|
||||||
|
if language == "ru" and re.search(r"\d", text):
|
||||||
|
text = numbers_to_words(text)
|
||||||
|
|
||||||
# Remove extra whitespace
|
# Remove extra whitespace
|
||||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||||
text = re.sub(r' +', ' ', text)
|
text = re.sub(r" +", " ", text)
|
||||||
|
|
||||||
# Clean up and return
|
# Clean up and return
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|||||||
10
config.py
10
config.py
@@ -2,6 +2,7 @@
|
|||||||
Configuration module for smart speaker.
|
Configuration module for smart speaker.
|
||||||
Loads environment variables from .env file.
|
Loads environment variables from .env file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -31,6 +32,13 @@ VOSK_MODEL_PATH = BASE_DIR / "vosk-model-ru-0.42"
|
|||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
CHANNELS = 1
|
CHANNELS = 1
|
||||||
|
|
||||||
|
# Set timezone to Moscow
|
||||||
|
import time
|
||||||
|
|
||||||
|
os.environ["TZ"] = "Europe/Moscow"
|
||||||
|
time.tzset()
|
||||||
|
|
||||||
# TTS configuration
|
# TTS configuration
|
||||||
TTS_SPEAKER = "eugene" # Available: aidar, baya, kseniya, xenia, eugene
|
TTS_SPEAKER = "eugene" # Available (ru): aidar, baya, kseniya, xenia, eugene
|
||||||
|
TTS_EN_SPEAKER = os.getenv("TTS_EN_SPEAKER", "en_0")
|
||||||
TTS_SAMPLE_RATE = 48000
|
TTS_SAMPLE_RATE = 48000
|
||||||
|
|||||||
116
local_stt.py
Normal file
116
local_stt.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
"""
|
||||||
|
Local offline Speech-to-Text module using Vosk.
|
||||||
|
Used for simple command detection (like "stop") without internet.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import pyaudio
|
||||||
|
from vosk import Model, KaldiRecognizer
|
||||||
|
from config import VOSK_MODEL_PATH, SAMPLE_RATE
|
||||||
|
|
||||||
|
class LocalRecognizer:
|
||||||
|
def __init__(self):
|
||||||
|
self.model = None
|
||||||
|
self.rec = None
|
||||||
|
self.pa = None
|
||||||
|
self.stream = None
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
if not os.path.exists(VOSK_MODEL_PATH):
|
||||||
|
print(f"❌ Ошибка: Vosk модель не найдена по пути {VOSK_MODEL_PATH}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print("📦 Инициализация локального STT (Vosk)...")
|
||||||
|
# Redirect stderr to suppress Vosk logs
|
||||||
|
try:
|
||||||
|
null_fd = os.open(os.devnull, os.O_WRONLY)
|
||||||
|
old_stderr = os.dup(2)
|
||||||
|
sys.stderr.flush()
|
||||||
|
os.dup2(null_fd, 2)
|
||||||
|
os.close(null_fd)
|
||||||
|
|
||||||
|
self.model = Model(str(VOSK_MODEL_PATH))
|
||||||
|
|
||||||
|
# Restore stderr
|
||||||
|
os.dup2(old_stderr, 2)
|
||||||
|
os.close(old_stderr)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error initializing Vosk: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.rec = KaldiRecognizer(self.model, SAMPLE_RATE)
|
||||||
|
self.pa = pyaudio.PyAudio()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def listen_for_keywords(self, keywords: list, timeout: float = 10.0) -> str:
|
||||||
|
"""
|
||||||
|
Listen for specific keywords locally.
|
||||||
|
Returns the recognized keyword if found, or empty string.
|
||||||
|
"""
|
||||||
|
if not self.model:
|
||||||
|
if not self.initialize():
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Open stream
|
||||||
|
try:
|
||||||
|
stream = self.pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=4096)
|
||||||
|
stream.start_stream()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка микрофона: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
import time
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
print(f"👂 Локальное слушание ожидает: {keywords}")
|
||||||
|
|
||||||
|
detected_text = ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
data = stream.read(4096, exception_on_overflow=False)
|
||||||
|
if self.rec.AcceptWaveform(data):
|
||||||
|
res = json.loads(self.rec.Result())
|
||||||
|
text = res.get("text", "")
|
||||||
|
if text:
|
||||||
|
print(f"📝 Локально: {text}")
|
||||||
|
# Check against keywords
|
||||||
|
for kw in keywords:
|
||||||
|
if kw in text:
|
||||||
|
detected_text = text
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Partial result
|
||||||
|
res = json.loads(self.rec.PartialResult())
|
||||||
|
partial = res.get("partial", "")
|
||||||
|
if partial:
|
||||||
|
for kw in keywords:
|
||||||
|
if kw in partial:
|
||||||
|
detected_text = partial
|
||||||
|
break
|
||||||
|
|
||||||
|
if detected_text:
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
return detected_text
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
if self.pa:
|
||||||
|
self.pa.terminate()
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
_local_recognizer = None
|
||||||
|
|
||||||
|
def get_local_recognizer():
|
||||||
|
global _local_recognizer
|
||||||
|
if _local_recognizer is None:
|
||||||
|
_local_recognizer = LocalRecognizer()
|
||||||
|
return _local_recognizer
|
||||||
|
|
||||||
|
def listen_for_keywords(keywords: list, timeout: float = 5.0) -> str:
|
||||||
|
"""Listen for keywords using Vosk."""
|
||||||
|
return get_local_recognizer().listen_for_keywords(keywords, timeout)
|
||||||
173
main.py
173
main.py
@@ -13,14 +13,22 @@ Flow:
|
|||||||
|
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
|
||||||
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
from wakeword import (
|
||||||
|
wait_for_wakeword,
|
||||||
|
cleanup as cleanup_wakeword,
|
||||||
|
check_wakeword_once,
|
||||||
|
stop_monitoring as stop_wakeword_monitoring,
|
||||||
|
)
|
||||||
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
from stt import listen, cleanup as cleanup_stt, get_recognizer
|
||||||
from ai import ask_ai
|
from ai import ask_ai, translate_text
|
||||||
from cleaner import clean_response
|
from cleaner import clean_response
|
||||||
from tts import speak, initialize as init_tts
|
from tts import speak, initialize as init_tts
|
||||||
from sound_level import set_volume, parse_volume_text
|
from sound_level import set_volume, parse_volume_text
|
||||||
|
from alarm import get_alarm_clock
|
||||||
|
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
@@ -31,6 +39,37 @@ def signal_handler(sig, frame):
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_translation_request(text: str):
|
||||||
|
"""
|
||||||
|
Detect translation commands and extract language direction and text.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with source_lang, target_lang, text or None
|
||||||
|
"""
|
||||||
|
patterns = [
|
||||||
|
(r"^переведи на английский\s*(.*)$", "ru", "en"),
|
||||||
|
(r"^переведи на русский\s*(.*)$", "en", "ru"),
|
||||||
|
(r"^переведи с английского\s*(.*)$", "en", "ru"),
|
||||||
|
(r"^переведи с русского\s*(.*)$", "ru", "en"),
|
||||||
|
(r"^как по[-\s]?английски\s*(.*)$", "ru", "en"),
|
||||||
|
(r"^как по[-\s]?русски\s*(.*)$", "en", "ru"),
|
||||||
|
(r"^translate (?:to|into) english\s*(.*)$", "ru", "en"),
|
||||||
|
(r"^translate (?:to|into) russian\s*(.*)$", "en", "ru"),
|
||||||
|
(r"^translate from english\s*(.*)$", "en", "ru"),
|
||||||
|
(r"^translate from russian\s*(.*)$", "ru", "en"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern, source_lang, target_lang in patterns:
|
||||||
|
match = re.match(pattern, text, flags=re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
return {
|
||||||
|
"source_lang": source_lang,
|
||||||
|
"target_lang": target_lang,
|
||||||
|
"text": match.group(1).strip(),
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main application loop."""
|
"""Main application loop."""
|
||||||
print("=" * 50)
|
print("=" * 50)
|
||||||
@@ -46,8 +85,31 @@ def main():
|
|||||||
|
|
||||||
# Pre-initialize models (takes a few seconds)
|
# Pre-initialize models (takes a few seconds)
|
||||||
print("⏳ Инициализация моделей...")
|
print("⏳ Инициализация моделей...")
|
||||||
get_recognizer().initialize() # Initialize STT model first
|
init_errors = []
|
||||||
init_tts() # Then initialize TTS model
|
|
||||||
|
def init_stt():
|
||||||
|
try:
|
||||||
|
get_recognizer().initialize()
|
||||||
|
except Exception as e:
|
||||||
|
init_errors.append(e)
|
||||||
|
|
||||||
|
def init_tts_model():
|
||||||
|
try:
|
||||||
|
init_tts()
|
||||||
|
except Exception as e:
|
||||||
|
init_errors.append(e)
|
||||||
|
|
||||||
|
stt_thread = threading.Thread(target=init_stt, daemon=True)
|
||||||
|
tts_thread = threading.Thread(target=init_tts_model, daemon=True)
|
||||||
|
stt_thread.start()
|
||||||
|
tts_thread.start()
|
||||||
|
stt_thread.join()
|
||||||
|
tts_thread.join()
|
||||||
|
|
||||||
|
if init_errors:
|
||||||
|
raise init_errors[0]
|
||||||
|
|
||||||
|
alarm_clock = get_alarm_clock() # Initialize Alarm Clock
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# Initialize chat history (last 10 exchanges = 20 messages)
|
# Initialize chat history (last 10 exchanges = 20 messages)
|
||||||
@@ -57,37 +119,58 @@ def main():
|
|||||||
skip_wakeword = False
|
skip_wakeword = False
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
# Ensure wake word detector stream is closed before listening
|
||||||
|
stop_wakeword_monitoring()
|
||||||
|
|
||||||
|
# Check for alarms every loop iteration
|
||||||
|
if alarm_clock.check_alarms():
|
||||||
|
# If alarm triggered and finished (user stopped it), we continue loop
|
||||||
|
# The alarm.trigger_alarm() blocks until stopped.
|
||||||
|
skip_wakeword = False # Reset state after alarm
|
||||||
|
continue
|
||||||
|
|
||||||
# Step 1: Wait for wake word or Follow-up listen
|
# Step 1: Wait for wake word or Follow-up listen
|
||||||
if not skip_wakeword:
|
if not skip_wakeword:
|
||||||
wait_for_wakeword()
|
# Wait with timeout to allow alarm checking
|
||||||
|
detected = wait_for_wakeword(timeout=1.0)
|
||||||
|
|
||||||
|
# If timeout (not detected), loop again to check alarms
|
||||||
|
if not detected:
|
||||||
|
continue
|
||||||
|
|
||||||
# Standard listen after activation
|
# Standard listen after activation
|
||||||
user_text = listen(timeout_seconds=7.0)
|
user_text = listen(timeout_seconds=7.0)
|
||||||
else:
|
else:
|
||||||
# Follow-up listen (wait 2.0s for start, then listen long)
|
# Follow-up listen (wait 5.0s for start)
|
||||||
print("👂 Слушаю продолжение диалога...")
|
print("👂 Слушаю продолжение диалога (5 сек)...")
|
||||||
user_text = listen(timeout_seconds=20.0, detection_timeout=2.0)
|
user_text = listen(timeout_seconds=10.0, detection_timeout=5.0)
|
||||||
|
|
||||||
if not user_text:
|
if not user_text:
|
||||||
# User didn't continue conversation, go back to sleep
|
# User didn't continue conversation, go back to sleep silently
|
||||||
skip_wakeword = False
|
skip_wakeword = False
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Reset flag for now (will be set to True if we speak successfully)
|
|
||||||
skip_wakeword = False
|
|
||||||
|
|
||||||
# Step 2: Check if speech was recognized
|
# Step 2: Check if speech was recognized
|
||||||
if not user_text:
|
if not user_text:
|
||||||
|
# If this was a direct wake word activation but no speech
|
||||||
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
||||||
|
skip_wakeword = False # Reset to wake word
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check for stop commands
|
# Check for stop commands
|
||||||
user_text_lower = user_text.lower().strip()
|
user_text_lower = user_text.lower().strip()
|
||||||
if user_text_lower in ["стоп", "александр", "стоп александр"]:
|
if user_text_lower in ["стоп", "александр", "стоп александр", "хватит"]:
|
||||||
print("_" * 50)
|
print("_" * 50)
|
||||||
print("💤 Жду 'Alexandr' для активации...")
|
print("💤 Жду 'Alexandr' для активации...")
|
||||||
skip_wakeword = False
|
skip_wakeword = False
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check for alarm commands
|
||||||
|
alarm_response = alarm_clock.parse_command(user_text)
|
||||||
|
if alarm_response:
|
||||||
|
speak(alarm_response)
|
||||||
|
continue
|
||||||
|
|
||||||
# Check for volume command
|
# Check for volume command
|
||||||
if user_text.lower().startswith("громкость"):
|
if user_text.lower().startswith("громкость"):
|
||||||
try:
|
try:
|
||||||
@@ -113,21 +196,67 @@ def main():
|
|||||||
speak("Не удалось изменить громкость.")
|
speak("Не удалось изменить громкость.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check for translation commands
|
||||||
|
translation_request = parse_translation_request(user_text)
|
||||||
|
if translation_request:
|
||||||
|
source_lang = translation_request["source_lang"]
|
||||||
|
target_lang = translation_request["target_lang"]
|
||||||
|
text_to_translate = translation_request["text"]
|
||||||
|
|
||||||
|
if not text_to_translate:
|
||||||
|
prompt = (
|
||||||
|
"Скажи фразу на английском."
|
||||||
|
if source_lang == "en"
|
||||||
|
else "Скажи фразу на русском."
|
||||||
|
)
|
||||||
|
speak(prompt)
|
||||||
|
text_to_translate = listen(
|
||||||
|
timeout_seconds=7.0, detection_timeout=5.0, lang=source_lang
|
||||||
|
)
|
||||||
|
|
||||||
|
if not text_to_translate:
|
||||||
|
speak("Я не расслышал текст для перевода.")
|
||||||
|
skip_wakeword = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
translated_text = translate_text(
|
||||||
|
text_to_translate, source_lang, target_lang
|
||||||
|
)
|
||||||
|
clean_text = clean_response(translated_text, language=target_lang)
|
||||||
|
|
||||||
|
completed = speak(
|
||||||
|
clean_text,
|
||||||
|
check_interrupt=check_wakeword_once,
|
||||||
|
language=target_lang,
|
||||||
|
)
|
||||||
|
stop_wakeword_monitoring()
|
||||||
|
skip_wakeword = True
|
||||||
|
|
||||||
|
if not completed:
|
||||||
|
print("⏹️ Перевод прерван - слушаю следующий вопрос")
|
||||||
|
continue
|
||||||
|
|
||||||
# Step 3: Send to AI
|
# Step 3: Send to AI
|
||||||
# Add user message to history
|
# Add user message to history
|
||||||
chat_history.append({"role": "user", "content": user_text})
|
chat_history.append({"role": "user", "content": user_text})
|
||||||
|
|
||||||
# Get response using history
|
# Get response using history
|
||||||
ai_response = ask_ai(list(chat_history))
|
ai_response = ask_ai(list(chat_history))
|
||||||
|
|
||||||
# Add AI response to history
|
# Add AI response to history
|
||||||
chat_history.append({"role": "assistant", "content": ai_response})
|
chat_history.append({"role": "assistant", "content": ai_response})
|
||||||
|
|
||||||
# Step 4: Clean response
|
# Step 4: Clean response
|
||||||
clean_text = clean_response(ai_response)
|
clean_text = clean_response(ai_response, language="ru")
|
||||||
|
|
||||||
# Step 5: Speak response (with wake word interrupt support)
|
# Step 5: Speak response (with wake word interrupt support)
|
||||||
completed = speak(clean_text, check_interrupt=check_wakeword_once)
|
# This uses check_wakeword_once which opens/closes stream as needed
|
||||||
|
completed = speak(
|
||||||
|
clean_text, check_interrupt=check_wakeword_once, language="ru"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Stop monitoring after TTS finishes (cleanup stream opened by check_wakeword_once)
|
||||||
|
stop_wakeword_monitoring()
|
||||||
|
|
||||||
# Enable follow-up mode for next iteration
|
# Enable follow-up mode for next iteration
|
||||||
skip_wakeword = True
|
skip_wakeword = True
|
||||||
@@ -136,7 +265,12 @@ def main():
|
|||||||
# but we can print a message
|
# but we can print a message
|
||||||
if not completed:
|
if not completed:
|
||||||
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
||||||
continue
|
# If interrupted, we treat it as immediate follow up?
|
||||||
|
# Usually interruption means "I have a new command"
|
||||||
|
# So skip_wakeword = True is correct.
|
||||||
|
# But we might want to listen IMMEDIATELY without waiting 5s for start?
|
||||||
|
# listen() handles that.
|
||||||
|
pass
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("-" * 30)
|
print("-" * 30)
|
||||||
@@ -149,6 +283,7 @@ def main():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Ошибка: {e}")
|
print(f"❌ Ошибка: {e}")
|
||||||
speak("Произошла ошибка. Попробуйте ещё раз.")
|
speak("Произошла ошибка. Попробуйте ещё раз.")
|
||||||
|
skip_wakeword = False
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
62
stt.py
62
stt.py
@@ -3,6 +3,7 @@ Speech-to-Text module using Deepgram API.
|
|||||||
Recognizes speech from microphone using streaming WebSocket.
|
Recognizes speech from microphone using streaming WebSocket.
|
||||||
Supports Russian (default) and English.
|
Supports Russian (default) and English.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
import threading
|
import threading
|
||||||
@@ -20,6 +21,7 @@ from deepgram import (
|
|||||||
# Configure logging to suppress debug noise
|
# Configure logging to suppress debug noise
|
||||||
logging.getLogger("deepgram").setLevel(logging.WARNING)
|
logging.getLogger("deepgram").setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
class SpeechRecognizer:
|
class SpeechRecognizer:
|
||||||
"""Speech recognizer using Deepgram streaming."""
|
"""Speech recognizer using Deepgram streaming."""
|
||||||
|
|
||||||
@@ -29,18 +31,18 @@ class SpeechRecognizer:
|
|||||||
self.stream = None
|
self.stream = None
|
||||||
self.transcript = ""
|
self.transcript = ""
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
"""Initialize Deepgram client and PyAudio."""
|
"""Initialize Deepgram client and PyAudio."""
|
||||||
if not DEEPGRAM_API_KEY:
|
if not DEEPGRAM_API_KEY:
|
||||||
raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.")
|
raise ValueError("DEEPGRAM_API_KEY is not set in environment or config.")
|
||||||
|
|
||||||
print("📦 Инициализация Deepgram STT...")
|
print("📦 Инициализация Deepgram STT...")
|
||||||
config = DeepgramClientOptions(
|
config = DeepgramClientOptions(
|
||||||
verbose=logging.WARNING,
|
verbose=logging.WARNING,
|
||||||
)
|
)
|
||||||
self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config)
|
self.dg_client = DeepgramClient(DEEPGRAM_API_KEY, config)
|
||||||
|
|
||||||
self.pa = pyaudio.PyAudio()
|
self.pa = pyaudio.PyAudio()
|
||||||
print("✅ Deepgram клиент готов")
|
print("✅ Deepgram клиент готов")
|
||||||
|
|
||||||
@@ -59,13 +61,14 @@ class SpeechRecognizer:
|
|||||||
async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
|
async def _process_audio(self, dg_connection, timeout_seconds, detection_timeout):
|
||||||
"""Async loop to send audio and wait for results."""
|
"""Async loop to send audio and wait for results."""
|
||||||
self.transcript = ""
|
self.transcript = ""
|
||||||
|
transcript_parts = []
|
||||||
|
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
stream = self._get_stream()
|
stream = self._get_stream()
|
||||||
|
|
||||||
stop_event = asyncio.Event()
|
stop_event = asyncio.Event()
|
||||||
speech_started_event = asyncio.Event()
|
speech_started_event = asyncio.Event()
|
||||||
|
|
||||||
# We need access to the outer 'self' (SpeechRecognizer instance)
|
# We need access to the outer 'self' (SpeechRecognizer instance)
|
||||||
speech_recognizer_self = self
|
speech_recognizer_self = self
|
||||||
|
|
||||||
@@ -74,9 +77,11 @@ class SpeechRecognizer:
|
|||||||
if len(sentence) == 0:
|
if len(sentence) == 0:
|
||||||
return
|
return
|
||||||
if result.is_final:
|
if result.is_final:
|
||||||
print(f"📝 Частичный результат: {sentence}")
|
|
||||||
with speech_recognizer_self.lock:
|
with speech_recognizer_self.lock:
|
||||||
speech_recognizer_self.transcript = sentence
|
transcript_parts.append(sentence)
|
||||||
|
speech_recognizer_self.transcript = " ".join(
|
||||||
|
transcript_parts
|
||||||
|
).strip()
|
||||||
|
|
||||||
def on_speech_started(unused_self, speech_started, **kwargs):
|
def on_speech_started(unused_self, speech_started, **kwargs):
|
||||||
loop.call_soon_threadsafe(speech_started_event.set)
|
loop.call_soon_threadsafe(speech_started_event.set)
|
||||||
@@ -102,7 +107,7 @@ class SpeechRecognizer:
|
|||||||
channels=1,
|
channels=1,
|
||||||
sample_rate=SAMPLE_RATE,
|
sample_rate=SAMPLE_RATE,
|
||||||
interim_results=True,
|
interim_results=True,
|
||||||
utterance_end_ms="1200",
|
utterance_end_ms=1200,
|
||||||
vad_events=True,
|
vad_events=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -133,38 +138,45 @@ class SpeechRecognizer:
|
|||||||
print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}")
|
print(f"\n🛑 Stream stopped. Chunks sent: {chunks_sent}")
|
||||||
|
|
||||||
sender_task = asyncio.create_task(send_audio())
|
sender_task = asyncio.create_task(send_audio())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Wait for speech to start (detection_timeout)
|
# 1. Wait for speech to start (detection_timeout)
|
||||||
if detection_timeout:
|
if detection_timeout:
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(speech_started_event.wait(), timeout=detection_timeout)
|
await asyncio.wait_for(
|
||||||
|
speech_started_event.wait(), timeout=detection_timeout
|
||||||
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
# print("Detection timeout - no speech")
|
# print("Detection timeout - no speech")
|
||||||
stop_event.set()
|
stop_event.set()
|
||||||
|
|
||||||
# 2. If started (or no detection timeout), wait for completion
|
# 2. If started (or no detection timeout), wait for completion
|
||||||
if not stop_event.is_set():
|
if not stop_event.is_set():
|
||||||
await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds)
|
await asyncio.wait_for(stop_event.wait(), timeout=timeout_seconds)
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
# print("Global timeout")
|
# print("Global timeout")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
stop_event.set()
|
stop_event.set()
|
||||||
await sender_task
|
await sender_task
|
||||||
# Finish is synchronous
|
# Finish is synchronous
|
||||||
dg_connection.finish()
|
dg_connection.finish()
|
||||||
|
|
||||||
return self.transcript
|
return self.transcript
|
||||||
|
|
||||||
def listen(self, timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
|
def listen(
|
||||||
|
self,
|
||||||
|
timeout_seconds: float = 7.0,
|
||||||
|
detection_timeout: float = None,
|
||||||
|
lang: str = "ru",
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Listen to microphone and transcribe speech.
|
Listen to microphone and transcribe speech.
|
||||||
"""
|
"""
|
||||||
if not self.dg_client:
|
if not self.dg_client:
|
||||||
self.initialize()
|
self.initialize()
|
||||||
|
|
||||||
self.current_lang = lang
|
self.current_lang = lang
|
||||||
print(f"🎙️ Слушаю ({lang})...")
|
print(f"🎙️ Слушаю ({lang})...")
|
||||||
|
|
||||||
@@ -172,16 +184,18 @@ class SpeechRecognizer:
|
|||||||
dg_connection = self.dg_client.listen.live.v("1")
|
dg_connection = self.dg_client.listen.live.v("1")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
transcript = asyncio.run(self._process_audio(dg_connection, timeout_seconds, detection_timeout))
|
transcript = asyncio.run(
|
||||||
|
self._process_audio(dg_connection, timeout_seconds, detection_timeout)
|
||||||
|
)
|
||||||
|
|
||||||
final_text = transcript.strip() if transcript else ""
|
final_text = transcript.strip() if transcript else ""
|
||||||
if final_text:
|
if final_text:
|
||||||
print(f"📝 Распознано: {final_text}")
|
print(f"📝 Распознано: {final_text}")
|
||||||
else:
|
else:
|
||||||
print("⚠️ Речь не распознана")
|
print("⚠️ Речь не распознана")
|
||||||
|
|
||||||
return final_text
|
return final_text
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Ошибка STT: {e}")
|
print(f"❌ Ошибка STT: {e}")
|
||||||
return ""
|
return ""
|
||||||
@@ -208,7 +222,9 @@ def get_recognizer() -> SpeechRecognizer:
|
|||||||
return _recognizer
|
return _recognizer
|
||||||
|
|
||||||
|
|
||||||
def listen(timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru") -> str:
|
def listen(
|
||||||
|
timeout_seconds: float = 7.0, detection_timeout: float = None, lang: str = "ru"
|
||||||
|
) -> str:
|
||||||
"""Listen to microphone and return transcribed text."""
|
"""Listen to microphone and return transcribed text."""
|
||||||
return get_recognizer().listen(timeout_seconds, detection_timeout, lang)
|
return get_recognizer().listen(timeout_seconds, detection_timeout, lang)
|
||||||
|
|
||||||
@@ -218,4 +234,4 @@ def cleanup():
|
|||||||
global _recognizer
|
global _recognizer
|
||||||
if _recognizer:
|
if _recognizer:
|
||||||
_recognizer.cleanup()
|
_recognizer.cleanup()
|
||||||
_recognizer = None
|
_recognizer = None
|
||||||
|
|||||||
70
tts.py
70
tts.py
@@ -11,7 +11,7 @@ import threading
|
|||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
import re
|
import re
|
||||||
from config import TTS_SPEAKER, TTS_SAMPLE_RATE
|
from config import TTS_SPEAKER, TTS_EN_SPEAKER, TTS_SAMPLE_RATE
|
||||||
|
|
||||||
# Suppress Silero TTS warning about text length
|
# Suppress Silero TTS warning about text length
|
||||||
warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols")
|
warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols")
|
||||||
@@ -21,27 +21,55 @@ class TextToSpeech:
|
|||||||
"""Text-to-Speech using Silero TTS with wake word interruption support."""
|
"""Text-to-Speech using Silero TTS with wake word interruption support."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.model = None
|
self.models = {}
|
||||||
self.sample_rate = TTS_SAMPLE_RATE
|
self.sample_rate = TTS_SAMPLE_RATE
|
||||||
self.speaker = TTS_SPEAKER
|
self.speakers = {
|
||||||
|
"ru": TTS_SPEAKER,
|
||||||
|
"en": TTS_EN_SPEAKER,
|
||||||
|
}
|
||||||
self._interrupted = False
|
self._interrupted = False
|
||||||
self._stop_flag = threading.Event()
|
self._stop_flag = threading.Event()
|
||||||
|
|
||||||
def initialize(self):
|
def _load_model(self, language: str):
|
||||||
"""Initialize Silero TTS model."""
|
"""Load and cache Silero TTS model for the given language."""
|
||||||
print("📦 Загрузка модели Silero TTS v5...")
|
if language in self.models:
|
||||||
|
return self.models[language]
|
||||||
|
|
||||||
# Load Silero TTS model
|
model_config = {
|
||||||
device = torch.device('cpu')
|
"ru": {"language": "ru", "model_id": "v5_ru"},
|
||||||
self.model, _ = torch.hub.load(
|
"en": {"language": "en", "model_id": "v3_en"},
|
||||||
|
}
|
||||||
|
|
||||||
|
if language not in model_config:
|
||||||
|
raise ValueError(f"Unsupported TTS language: {language}")
|
||||||
|
|
||||||
|
config = model_config[language]
|
||||||
|
print(f"📦 Загрузка модели Silero TTS ({language})...")
|
||||||
|
|
||||||
|
device = torch.device("cpu")
|
||||||
|
model, _ = torch.hub.load(
|
||||||
repo_or_dir="snakers4/silero-models",
|
repo_or_dir="snakers4/silero-models",
|
||||||
model="silero_tts",
|
model="silero_tts",
|
||||||
language="ru",
|
language=config["language"],
|
||||||
speaker="v5_ru",
|
speaker=config["model_id"],
|
||||||
)
|
)
|
||||||
self.model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
print(f"✅ Модель TTS v5 загружена (голос: {self.speaker})")
|
self.models[language] = model
|
||||||
|
return model
|
||||||
|
|
||||||
|
def _get_speaker(self, language: str, model) -> str:
|
||||||
|
"""Return a valid speaker for the loaded model."""
|
||||||
|
speaker = self.speakers.get(language)
|
||||||
|
if hasattr(model, "speakers") and speaker not in model.speakers:
|
||||||
|
fallback = model.speakers[0] if model.speakers else speaker
|
||||||
|
print(f"⚠️ Голос '{speaker}' недоступен, использую '{fallback}'")
|
||||||
|
return fallback
|
||||||
|
return speaker
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""Initialize default (Russian) TTS model."""
|
||||||
|
self._load_model("ru")
|
||||||
|
|
||||||
def _split_text(self, text: str, max_length: int = 900) -> list[str]:
|
def _split_text(self, text: str, max_length: int = 900) -> list[str]:
|
||||||
"""Split text into chunks smaller than max_length."""
|
"""Split text into chunks smaller than max_length."""
|
||||||
@@ -83,13 +111,14 @@ class TextToSpeech:
|
|||||||
# Filter empty chunks
|
# Filter empty chunks
|
||||||
return [c for c in chunks if c]
|
return [c for c in chunks if c]
|
||||||
|
|
||||||
def speak(self, text: str, check_interrupt=None) -> bool:
|
def speak(self, text: str, check_interrupt=None, language: str = "ru") -> bool:
|
||||||
"""
|
"""
|
||||||
Convert text to speech and play it.
|
Convert text to speech and play it.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Text to synthesize and speak
|
text: Text to synthesize and speak
|
||||||
check_interrupt: Optional callback function that returns True if playback should stop
|
check_interrupt: Optional callback function that returns True if playback should stop
|
||||||
|
language: Language code for voice selection ("ru" or "en")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if playback completed normally, False if interrupted
|
True if playback completed normally, False if interrupted
|
||||||
@@ -97,8 +126,8 @@ class TextToSpeech:
|
|||||||
if not text.strip():
|
if not text.strip():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if not self.model:
|
model = self._load_model(language)
|
||||||
self.initialize()
|
speaker = self._get_speaker(language, model)
|
||||||
|
|
||||||
# Split text into manageable chunks
|
# Split text into manageable chunks
|
||||||
chunks = self._split_text(text)
|
chunks = self._split_text(text)
|
||||||
@@ -120,8 +149,8 @@ class TextToSpeech:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Generate audio for chunk
|
# Generate audio for chunk
|
||||||
audio = self.model.apply_tts(
|
audio = model.apply_tts(
|
||||||
text=chunk, speaker=self.speaker, sample_rate=self.sample_rate
|
text=chunk, speaker=speaker, sample_rate=self.sample_rate
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert to numpy array
|
# Convert to numpy array
|
||||||
@@ -218,18 +247,19 @@ def get_tts() -> TextToSpeech:
|
|||||||
return _tts
|
return _tts
|
||||||
|
|
||||||
|
|
||||||
def speak(text: str, check_interrupt=None) -> bool:
|
def speak(text: str, check_interrupt=None, language: str = "ru") -> bool:
|
||||||
"""
|
"""
|
||||||
Synthesize and speak the given text.
|
Synthesize and speak the given text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Text to speak
|
text: Text to speak
|
||||||
check_interrupt: Optional callback for interrupt checking
|
check_interrupt: Optional callback for interrupt checking
|
||||||
|
language: Language code for voice selection ("ru" or "en")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if completed normally, False if interrupted
|
True if completed normally, False if interrupted
|
||||||
"""
|
"""
|
||||||
return get_tts().speak(text, check_interrupt)
|
return get_tts().speak(text, check_interrupt, language)
|
||||||
|
|
||||||
|
|
||||||
def was_interrupted() -> bool:
|
def was_interrupted() -> bool:
|
||||||
|
|||||||
94
wakeword.py
94
wakeword.py
@@ -15,6 +15,7 @@ class WakeWordDetector:
|
|||||||
self.porcupine = None
|
self.porcupine = None
|
||||||
self.audio_stream = None
|
self.audio_stream = None
|
||||||
self.pa = None
|
self.pa = None
|
||||||
|
self._stream_closed = True # Track state explicitly
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
"""Initialize Porcupine and audio stream."""
|
"""Initialize Porcupine and audio stream."""
|
||||||
@@ -24,6 +25,19 @@ class WakeWordDetector:
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.pa = pyaudio.PyAudio()
|
self.pa = pyaudio.PyAudio()
|
||||||
|
self._open_stream()
|
||||||
|
print("🎤 Ожидание wake word 'Alexandr'...")
|
||||||
|
|
||||||
|
def _open_stream(self):
|
||||||
|
"""Open the audio stream."""
|
||||||
|
if self.audio_stream and not self._stream_closed:
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.audio_stream:
|
||||||
|
try:
|
||||||
|
self.audio_stream.close()
|
||||||
|
except: pass
|
||||||
|
|
||||||
self.audio_stream = self.pa.open(
|
self.audio_stream = self.pa.open(
|
||||||
rate=self.porcupine.sample_rate,
|
rate=self.porcupine.sample_rate,
|
||||||
channels=1,
|
channels=1,
|
||||||
@@ -31,44 +45,47 @@ class WakeWordDetector:
|
|||||||
input=True,
|
input=True,
|
||||||
frames_per_buffer=self.porcupine.frame_length
|
frames_per_buffer=self.porcupine.frame_length
|
||||||
)
|
)
|
||||||
print("🎤 Ожидание wake word 'Alexandr'...")
|
self._stream_closed = False
|
||||||
|
|
||||||
def wait_for_wakeword(self) -> bool:
|
def stop_monitoring(self):
|
||||||
|
"""Explicitly stop and close the stream."""
|
||||||
|
if self.audio_stream and not self._stream_closed:
|
||||||
|
try:
|
||||||
|
self.audio_stream.stop_stream()
|
||||||
|
self.audio_stream.close()
|
||||||
|
except: pass
|
||||||
|
self._stream_closed = True
|
||||||
|
|
||||||
|
def wait_for_wakeword(self, timeout: float = None) -> bool:
|
||||||
"""
|
"""
|
||||||
Blocks until wake word is detected.
|
Blocks until wake word is detected or timeout expires.
|
||||||
Returns True when wake word is detected.
|
|
||||||
|
Args:
|
||||||
|
timeout: Maximum seconds to wait. None = infinite.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if wake word detected, False if timeout.
|
||||||
"""
|
"""
|
||||||
|
import time
|
||||||
if not self.porcupine:
|
if not self.porcupine:
|
||||||
self.initialize()
|
self.initialize()
|
||||||
|
|
||||||
# Ensure stream is open and active
|
# Ensure stream is open
|
||||||
if self.audio_stream is None or not self.audio_stream.is_active():
|
self._open_stream()
|
||||||
# If closed or None, we might need to recreate it.
|
|
||||||
# PyAudio streams once closed cannot be reopened usually?
|
start_time = time.time()
|
||||||
# We should probably recreate it.
|
|
||||||
if self.audio_stream:
|
|
||||||
try:
|
|
||||||
self.audio_stream.close()
|
|
||||||
except: pass
|
|
||||||
|
|
||||||
self.audio_stream = self.pa.open(
|
|
||||||
rate=self.porcupine.sample_rate,
|
|
||||||
channels=1,
|
|
||||||
format=pyaudio.paInt16,
|
|
||||||
input=True,
|
|
||||||
frames_per_buffer=self.porcupine.frame_length
|
|
||||||
)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
if timeout and (time.time() - start_time > timeout):
|
||||||
|
return False
|
||||||
|
|
||||||
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
||||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||||
|
|
||||||
keyword_index = self.porcupine.process(pcm)
|
keyword_index = self.porcupine.process(pcm)
|
||||||
if keyword_index >= 0:
|
if keyword_index >= 0:
|
||||||
print("✅ Wake word обнаружен!")
|
print("✅ Wake word обнаружен!")
|
||||||
# Stop and CLOSE stream to release mic for STT
|
self.stop_monitoring()
|
||||||
self.audio_stream.stop_stream()
|
|
||||||
self.audio_stream.close()
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def check_wakeword_once(self) -> bool:
|
def check_wakeword_once(self) -> bool:
|
||||||
@@ -80,20 +97,8 @@ class WakeWordDetector:
|
|||||||
self.initialize()
|
self.initialize()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Ensure stream is open/active
|
# Ensure stream is open
|
||||||
if self.audio_stream is None or not self.audio_stream.is_active():
|
self._open_stream()
|
||||||
# Re-open if needed (similar to wait_for_wakeword logic)
|
|
||||||
if self.audio_stream:
|
|
||||||
try:
|
|
||||||
self.audio_stream.close()
|
|
||||||
except: pass
|
|
||||||
self.audio_stream = self.pa.open(
|
|
||||||
rate=self.porcupine.sample_rate,
|
|
||||||
channels=1,
|
|
||||||
format=pyaudio.paInt16,
|
|
||||||
input=True,
|
|
||||||
frames_per_buffer=self.porcupine.frame_length
|
|
||||||
)
|
|
||||||
|
|
||||||
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
||||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||||
@@ -108,8 +113,7 @@ class WakeWordDetector:
|
|||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
"""Release resources."""
|
"""Release resources."""
|
||||||
if self.audio_stream:
|
self.stop_monitoring()
|
||||||
self.audio_stream.close()
|
|
||||||
if self.pa:
|
if self.pa:
|
||||||
self.pa.terminate()
|
self.pa.terminate()
|
||||||
if self.porcupine:
|
if self.porcupine:
|
||||||
@@ -128,10 +132,14 @@ def get_detector() -> WakeWordDetector:
|
|||||||
return _detector
|
return _detector
|
||||||
|
|
||||||
|
|
||||||
def wait_for_wakeword() -> bool:
|
def wait_for_wakeword(timeout: float = None) -> bool:
|
||||||
"""Wait for wake word detection."""
|
"""Wait for wake word detection."""
|
||||||
return get_detector().wait_for_wakeword()
|
return get_detector().wait_for_wakeword(timeout)
|
||||||
|
|
||||||
|
def stop_monitoring():
|
||||||
|
"""Stop monitoring for wake word."""
|
||||||
|
if _detector:
|
||||||
|
_detector.stop_monitoring()
|
||||||
|
|
||||||
def cleanup():
|
def cleanup():
|
||||||
"""Cleanup detector resources."""
|
"""Cleanup detector resources."""
|
||||||
|
|||||||
Reference in New Issue
Block a user