first commit
This commit is contained in:
39
.gitignore
vendored
Normal file
39
.gitignore
vendored
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# Vosk models
|
||||||
|
vosk-model-*/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# VS Code
|
||||||
|
.vscode/
|
||||||
BIN
Alexandr_en_linux_v4_0_0.ppn
Normal file
BIN
Alexandr_en_linux_v4_0_0.ppn
Normal file
Binary file not shown.
BIN
Alexandr_en_linux_v4_0_0/Alexandr_en_linux_v4_0_0.ppn
Normal file
BIN
Alexandr_en_linux_v4_0_0/Alexandr_en_linux_v4_0_0.ppn
Normal file
Binary file not shown.
1
Alexandr_en_linux_v4_0_0/LICENSE.txt
Executable file
1
Alexandr_en_linux_v4_0_0/LICENSE.txt
Executable file
@@ -0,0 +1 @@
|
|||||||
|
A copy of license terms is available at https://picovoice.ai/docs/terms-of-use/
|
||||||
1
LICENSE.txt
Executable file
1
LICENSE.txt
Executable file
@@ -0,0 +1 @@
|
|||||||
|
A copy of license terms is available at https://picovoice.ai/docs/terms-of-use/
|
||||||
67
ai.py
Normal file
67
ai.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
"""
|
||||||
|
AI module for Perplexity API integration.
|
||||||
|
Sends user queries and receives AI responses.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
from config import PERPLEXITY_API_KEY, PERPLEXITY_MODEL, PERPLEXITY_API_URL
|
||||||
|
|
||||||
|
|
||||||
|
# System prompt for the AI
|
||||||
|
SYSTEM_PROMPT = """Ты — голосовой ассистент умной колонки.
|
||||||
|
Отвечай кратко, по существу, на русском языке.
|
||||||
|
Избегай длинных списков и сложного форматирования.
|
||||||
|
Твои ответы будут озвучены голосом, поэтому пиши естественным разговорным языком."""
|
||||||
|
|
||||||
|
|
||||||
|
def ask_ai(user_message: str) -> str:
|
||||||
|
"""
|
||||||
|
Send a message to Perplexity AI and get a response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_message: User's question or command
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AI response text
|
||||||
|
"""
|
||||||
|
if not user_message.strip():
|
||||||
|
return "Извините, я не расслышал вашу команду."
|
||||||
|
|
||||||
|
print(f"🤖 Запрос к AI: {user_message}")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": PERPLEXITY_MODEL,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": user_message}
|
||||||
|
],
|
||||||
|
"max_tokens": 500,
|
||||||
|
"temperature": 0.7
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
PERPLEXITY_API_URL,
|
||||||
|
headers=headers,
|
||||||
|
json=payload,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
ai_response = data["choices"][0]["message"]["content"]
|
||||||
|
print(f"💬 Ответ AI: {ai_response[:100]}...")
|
||||||
|
return ai_response
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return "Извините, сервер не отвечает. Попробуйте позже."
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"❌ Ошибка API: {e}")
|
||||||
|
return "Произошла ошибка при обращении к AI. Попробуйте ещё раз."
|
||||||
|
except (KeyError, IndexError) as e:
|
||||||
|
print(f"❌ Ошибка парсинга ответа: {e}")
|
||||||
|
return "Не удалось обработать ответ от AI."
|
||||||
72
cleaner.py
Normal file
72
cleaner.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""
|
||||||
|
Response cleaner module.
|
||||||
|
Removes markdown formatting and special characters from AI responses.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def clean_response(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Clean AI response from markdown formatting and special characters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Raw AI response with possible markdown
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Clean text suitable for TTS
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Remove citation references like [1], [2], [citation], etc.
|
||||||
|
text = re.sub(r'\[\d+\]', '', text)
|
||||||
|
text = re.sub(r'\[citation\s*needed\]', '', text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r'\[source\]', '', text, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
# Remove markdown bold **text** and __text__
|
||||||
|
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||||
|
text = re.sub(r'__(.+?)__', r'\1', text)
|
||||||
|
|
||||||
|
# Remove markdown italic *text* and _text_
|
||||||
|
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
||||||
|
text = re.sub(r'(?<!\w)_(.+?)_(?!\w)', r'\1', text)
|
||||||
|
|
||||||
|
# Remove markdown strikethrough ~~text~~
|
||||||
|
text = re.sub(r'~~(.+?)~~', r'\1', text)
|
||||||
|
|
||||||
|
# Remove markdown headers # ## ### etc.
|
||||||
|
text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# Remove markdown links [text](url) -> text
|
||||||
|
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
|
||||||
|
|
||||||
|
# Remove markdown images 
|
||||||
|
text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
|
||||||
|
|
||||||
|
# Remove inline code `code`
|
||||||
|
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||||
|
|
||||||
|
# Remove code blocks ```code```
|
||||||
|
text = re.sub(r'```[\s\S]*?```', '', text)
|
||||||
|
|
||||||
|
# Remove markdown list markers (-, *, +, numbered)
|
||||||
|
text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
|
||||||
|
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# Remove blockquotes
|
||||||
|
text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# Remove horizontal rules
|
||||||
|
text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# Remove HTML tags if any
|
||||||
|
text = re.sub(r'<[^>]+>', '', text)
|
||||||
|
|
||||||
|
# Remove extra whitespace
|
||||||
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||||
|
text = re.sub(r' +', ' ', text)
|
||||||
|
|
||||||
|
# Clean up and return
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
return text
|
||||||
33
config.py
Normal file
33
config.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
"""
|
||||||
|
Configuration module for smart speaker.
|
||||||
|
Loads environment variables from .env file.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Base paths
|
||||||
|
BASE_DIR = Path(__file__).parent
|
||||||
|
|
||||||
|
# Perplexity API configuration
|
||||||
|
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
||||||
|
PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-online")
|
||||||
|
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
||||||
|
|
||||||
|
# Porcupine configuration
|
||||||
|
PORCUPINE_ACCESS_KEY = os.getenv("PORCUPINE_ACCESS_KEY")
|
||||||
|
PORCUPINE_KEYWORD_PATH = BASE_DIR / "Alexandr_en_linux_v4_0_0.ppn"
|
||||||
|
|
||||||
|
# Vosk configuration
|
||||||
|
VOSK_MODEL_PATH = BASE_DIR / "vosk-model-ru-0.42"
|
||||||
|
|
||||||
|
# Audio configuration
|
||||||
|
SAMPLE_RATE = 16000
|
||||||
|
CHANNELS = 1
|
||||||
|
|
||||||
|
# TTS configuration
|
||||||
|
TTS_SPEAKER = "xenia" # Available: aidar, baya, kseniya, xenia, eugene
|
||||||
|
TTS_SAMPLE_RATE = 48000
|
||||||
119
main.py
Normal file
119
main.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
"""
|
||||||
|
Smart Speaker - Main Application
|
||||||
|
Голосовой ассистент с wake word detection, STT, AI и TTS.
|
||||||
|
|
||||||
|
Flow:
|
||||||
|
1. Wait for wake word ("Alexandr")
|
||||||
|
2. Listen to user speech (STT)
|
||||||
|
3. Send query to AI (Perplexity)
|
||||||
|
4. Clean response from markdown
|
||||||
|
5. Speak response (TTS)
|
||||||
|
6. Loop back to step 1
|
||||||
|
"""
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from wakeword import wait_for_wakeword, cleanup as cleanup_wakeword, check_wakeword_once
|
||||||
|
from stt import listen, cleanup as cleanup_stt
|
||||||
|
from ai import ask_ai
|
||||||
|
from cleaner import clean_response
|
||||||
|
from tts import speak, initialize as init_tts
|
||||||
|
from sound_level import set_volume, parse_volume_text
|
||||||
|
|
||||||
|
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
"""Handle Ctrl+C gracefully."""
|
||||||
|
print("\n\n👋 Завершение работы...")
|
||||||
|
cleanup_wakeword()
|
||||||
|
cleanup_stt()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main application loop."""
|
||||||
|
print("=" * 50)
|
||||||
|
print("🔊 УМНАЯ КОЛОНКА")
|
||||||
|
print("=" * 50)
|
||||||
|
print("Скажите 'Alexandr' для активации")
|
||||||
|
print("Нажмите Ctrl+C для выхода")
|
||||||
|
print("=" * 50)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Setup signal handler for graceful exit
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
|
# Pre-initialize TTS model (takes a few seconds)
|
||||||
|
print("⏳ Инициализация...")
|
||||||
|
init_tts()
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Main loop
|
||||||
|
skip_wakeword = False
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Step 1: Wait for wake word
|
||||||
|
if not skip_wakeword:
|
||||||
|
wait_for_wakeword()
|
||||||
|
|
||||||
|
skip_wakeword = False
|
||||||
|
|
||||||
|
# Step 2: Listen to user speech
|
||||||
|
user_text = listen(timeout_seconds=7.0)
|
||||||
|
|
||||||
|
if not user_text:
|
||||||
|
speak("Извините, я вас не расслышал. Попробуйте ещё раз.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for volume command
|
||||||
|
if user_text.lower().startswith("громкость"):
|
||||||
|
try:
|
||||||
|
# Remove "громкость" prefix and strip whitespace
|
||||||
|
vol_str = user_text.lower().replace("громкость", "", 1).strip()
|
||||||
|
|
||||||
|
# Try to parse the number
|
||||||
|
level = parse_volume_text(vol_str)
|
||||||
|
|
||||||
|
if level is not None:
|
||||||
|
if set_volume(level):
|
||||||
|
speak(f"Громкость установлена на {level}")
|
||||||
|
else:
|
||||||
|
speak("Не удалось установить громкость.")
|
||||||
|
else:
|
||||||
|
speak("Я не понял число громкости. Скажите число от одного до десяти.")
|
||||||
|
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка громкости: {e}")
|
||||||
|
speak("Не удалось изменить громкость.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Step 3: Send to AI
|
||||||
|
ai_response = ask_ai(user_text)
|
||||||
|
|
||||||
|
# Step 4: Clean response
|
||||||
|
clean_text = clean_response(ai_response)
|
||||||
|
|
||||||
|
# Step 5: Speak response (with wake word interrupt support)
|
||||||
|
completed = speak(clean_text, check_interrupt=check_wakeword_once)
|
||||||
|
|
||||||
|
# If interrupted by wake word, go back to waiting for wake word
|
||||||
|
if not completed:
|
||||||
|
print("⏹️ Ответ прерван - слушаю следующий вопрос")
|
||||||
|
skip_wakeword = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("-" * 30)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Step 6: Loop continues...
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
signal_handler(None, None)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка: {e}")
|
||||||
|
speak("Произошла ошибка. Попробуйте ещё раз.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
26
requirements.txt
Normal file
26
requirements.txt
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Smart Speaker Dependencies
|
||||||
|
# Python 3.12.8
|
||||||
|
|
||||||
|
# Wake word detection
|
||||||
|
pvporcupine>=3.0.0
|
||||||
|
|
||||||
|
# Speech-to-Text
|
||||||
|
vosk>=0.3.45
|
||||||
|
|
||||||
|
# Audio
|
||||||
|
pyaudio>=0.2.14
|
||||||
|
sounddevice>=0.4.6
|
||||||
|
|
||||||
|
# AI API
|
||||||
|
requests>=2.31.0
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
|
# TTS (Silero)
|
||||||
|
torch>=2.0.0
|
||||||
|
torchaudio>=2.0.0
|
||||||
|
omegaconf>=2.3.0
|
||||||
|
|
||||||
|
# Utils
|
||||||
|
numpy>=1.24.0
|
||||||
70
sound_level.py
Normal file
70
sound_level.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
Volume control module.
|
||||||
|
Regulates system volume on a scale from 1 to 10.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
|
||||||
|
NUMBER_MAP = {
|
||||||
|
"один": 1, "раз": 1, "два": 2, "три": 3, "четыре": 4,
|
||||||
|
"пять": 5, "шесть": 6, "семь": 7, "восемь": 8, "девять": 9, "десять": 10
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def set_volume(level: int) -> bool:
|
||||||
|
"""
|
||||||
|
Set system volume (1-10 corresponding to 10%-100%).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
level: Integer between 1 and 10
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
if not isinstance(level, int):
|
||||||
|
print(f"❌ Ошибка: Уровень громкости должен быть целым числом, получено {type(level)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if level < 1:
|
||||||
|
level = 1
|
||||||
|
elif level > 10:
|
||||||
|
level = 10
|
||||||
|
|
||||||
|
percentage = level * 10
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Set volume using amixer
|
||||||
|
# -q: quiet
|
||||||
|
# sset: set simple control
|
||||||
|
# Master: control name
|
||||||
|
# %: percentage
|
||||||
|
cmd = ["amixer", "-q", "sset", "Master", f"{percentage}%"]
|
||||||
|
subprocess.run(cmd, check=True)
|
||||||
|
print(f"🔊 Громкость установлена на {level} ({percentage}%)")
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"❌ Ошибка при установке громкости: {e}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Неизвестная ошибка громкости: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def parse_volume_text(text: str) -> int | None:
|
||||||
|
"""
|
||||||
|
Parse volume level from text (digits or Russian words).
|
||||||
|
Returns integer 1-10 or None if not found.
|
||||||
|
"""
|
||||||
|
text = text.lower()
|
||||||
|
|
||||||
|
# 1. Check for digits
|
||||||
|
num_match = re.search(r'\b(10|[1-9])\b', text)
|
||||||
|
if num_match:
|
||||||
|
return int(num_match.group())
|
||||||
|
|
||||||
|
# 2. Check for words
|
||||||
|
for word, value in NUMBER_MAP.items():
|
||||||
|
if word in text:
|
||||||
|
return value
|
||||||
|
|
||||||
|
return None
|
||||||
122
stt.py
Normal file
122
stt.py
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
"""
|
||||||
|
Speech-to-Text module using Vosk.
|
||||||
|
Recognizes Russian speech from microphone.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import pyaudio
|
||||||
|
from vosk import Model, KaldiRecognizer
|
||||||
|
from config import VOSK_MODEL_PATH, SAMPLE_RATE
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechRecognizer:
|
||||||
|
"""Speech recognizer using Vosk."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.model = None
|
||||||
|
self.recognizer = None
|
||||||
|
self.pa = None
|
||||||
|
self.stream = None
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""Initialize Vosk model and audio stream."""
|
||||||
|
print("📦 Загрузка модели Vosk...")
|
||||||
|
self.model = Model(str(VOSK_MODEL_PATH))
|
||||||
|
self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE)
|
||||||
|
self.recognizer.SetWords(True)
|
||||||
|
|
||||||
|
self.pa = pyaudio.PyAudio()
|
||||||
|
self.stream = self.pa.open(
|
||||||
|
rate=SAMPLE_RATE,
|
||||||
|
channels=1,
|
||||||
|
format=pyaudio.paInt16,
|
||||||
|
input=True,
|
||||||
|
frames_per_buffer=4096
|
||||||
|
)
|
||||||
|
print("✅ Модель Vosk загружена")
|
||||||
|
|
||||||
|
def listen(self, timeout_seconds: float = 5.0) -> str:
|
||||||
|
"""
|
||||||
|
Listen to microphone and transcribe speech.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout_seconds: Maximum time to listen for speech
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Transcribed text from speech
|
||||||
|
"""
|
||||||
|
if not self.model:
|
||||||
|
self.initialize()
|
||||||
|
|
||||||
|
print("🎙️ Слушаю... (говорите)")
|
||||||
|
|
||||||
|
# Reset recognizer for new recognition
|
||||||
|
self.recognizer = KaldiRecognizer(self.model, SAMPLE_RATE)
|
||||||
|
|
||||||
|
frames_to_read = int(SAMPLE_RATE * timeout_seconds / 4096)
|
||||||
|
silence_frames = 0
|
||||||
|
max_silence_frames = 10 # About 2.5 seconds of silence
|
||||||
|
|
||||||
|
for _ in range(frames_to_read):
|
||||||
|
data = self.stream.read(4096, exception_on_overflow=False)
|
||||||
|
|
||||||
|
if self.recognizer.AcceptWaveform(data):
|
||||||
|
result = json.loads(self.recognizer.Result())
|
||||||
|
text = result.get("text", "").strip()
|
||||||
|
if text:
|
||||||
|
print(f"📝 Распознано: {text}")
|
||||||
|
return text
|
||||||
|
silence_frames += 1
|
||||||
|
else:
|
||||||
|
# Check partial result
|
||||||
|
partial = json.loads(self.recognizer.PartialResult())
|
||||||
|
if partial.get("partial", ""):
|
||||||
|
silence_frames = 0
|
||||||
|
else:
|
||||||
|
silence_frames += 1
|
||||||
|
|
||||||
|
# Stop if too much silence after speech
|
||||||
|
if silence_frames > max_silence_frames:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get final result
|
||||||
|
result = json.loads(self.recognizer.FinalResult())
|
||||||
|
text = result.get("text", "").strip()
|
||||||
|
|
||||||
|
if text:
|
||||||
|
print(f"📝 Распознано: {text}")
|
||||||
|
else:
|
||||||
|
print("⚠️ Речь не распознана")
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""Release resources."""
|
||||||
|
if self.stream:
|
||||||
|
self.stream.close()
|
||||||
|
if self.pa:
|
||||||
|
self.pa.terminate()
|
||||||
|
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
_recognizer = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_recognizer() -> SpeechRecognizer:
|
||||||
|
"""Get or create speech recognizer instance."""
|
||||||
|
global _recognizer
|
||||||
|
if _recognizer is None:
|
||||||
|
_recognizer = SpeechRecognizer()
|
||||||
|
return _recognizer
|
||||||
|
|
||||||
|
|
||||||
|
def listen(timeout_seconds: float = 5.0) -> str:
|
||||||
|
"""Listen to microphone and return transcribed text."""
|
||||||
|
return get_recognizer().listen(timeout_seconds)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
"""Cleanup recognizer resources."""
|
||||||
|
global _recognizer
|
||||||
|
if _recognizer:
|
||||||
|
_recognizer.cleanup()
|
||||||
|
_recognizer = None
|
||||||
178
tts.py
Normal file
178
tts.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
"""
|
||||||
|
Text-to-Speech module using Silero TTS.
|
||||||
|
Generates natural Russian speech with Xenia voice.
|
||||||
|
Supports interruption via wake word detection using threading.
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
import sounddevice as sd
|
||||||
|
import numpy as np
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from config import TTS_SPEAKER, TTS_SAMPLE_RATE
|
||||||
|
|
||||||
|
|
||||||
|
class TextToSpeech:
|
||||||
|
"""Text-to-Speech using Silero TTS with wake word interruption support."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.model = None
|
||||||
|
self.sample_rate = TTS_SAMPLE_RATE
|
||||||
|
self.speaker = TTS_SPEAKER
|
||||||
|
self._interrupted = False
|
||||||
|
self._stop_flag = threading.Event()
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""Initialize Silero TTS model."""
|
||||||
|
print("📦 Загрузка модели Silero TTS...")
|
||||||
|
|
||||||
|
# Load Silero TTS model
|
||||||
|
self.model, _ = torch.hub.load(
|
||||||
|
repo_or_dir='snakers4/silero-models',
|
||||||
|
model='silero_tts',
|
||||||
|
language='ru',
|
||||||
|
speaker='v4_ru'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"✅ Модель TTS загружена (голос: {self.speaker})")
|
||||||
|
|
||||||
|
def speak(self, text: str, check_interrupt=None) -> bool:
|
||||||
|
"""
|
||||||
|
Convert text to speech and play it.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to synthesize and speak
|
||||||
|
check_interrupt: Optional callback function that returns True if playback should stop
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if playback completed normally, False if interrupted
|
||||||
|
"""
|
||||||
|
if not text.strip():
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not self.model:
|
||||||
|
self.initialize()
|
||||||
|
|
||||||
|
print(f"🔊 Озвучивание: {text[:50]}...")
|
||||||
|
|
||||||
|
self._interrupted = False
|
||||||
|
self._stop_flag.clear()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Generate audio
|
||||||
|
audio = self.model.apply_tts(
|
||||||
|
text=text,
|
||||||
|
speaker=self.speaker,
|
||||||
|
sample_rate=self.sample_rate
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to numpy array
|
||||||
|
audio_np = audio.numpy()
|
||||||
|
|
||||||
|
if check_interrupt:
|
||||||
|
# Play with interrupt checking in parallel thread
|
||||||
|
return self._play_with_interrupt(audio_np, check_interrupt)
|
||||||
|
else:
|
||||||
|
# Standard playback
|
||||||
|
sd.play(audio_np, self.sample_rate)
|
||||||
|
sd.wait()
|
||||||
|
print("✅ Воспроизведение завершено")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка TTS: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _check_interrupt_worker(self, check_interrupt):
|
||||||
|
"""
|
||||||
|
Worker thread that continuously checks for interrupt signal.
|
||||||
|
"""
|
||||||
|
while not self._stop_flag.is_set():
|
||||||
|
try:
|
||||||
|
if check_interrupt():
|
||||||
|
self._interrupted = True
|
||||||
|
sd.stop()
|
||||||
|
print("⏹️ Воспроизведение прервано!")
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _play_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
|
||||||
|
"""
|
||||||
|
Play audio with interrupt checking in parallel thread.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_np: Audio data as numpy array
|
||||||
|
check_interrupt: Callback that returns True if should interrupt
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if completed normally, False if interrupted
|
||||||
|
"""
|
||||||
|
# Start interrupt checker thread
|
||||||
|
checker_thread = threading.Thread(
|
||||||
|
target=self._check_interrupt_worker,
|
||||||
|
args=(check_interrupt,),
|
||||||
|
daemon=True
|
||||||
|
)
|
||||||
|
checker_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Play audio (non-blocking start)
|
||||||
|
sd.play(audio_np, self.sample_rate)
|
||||||
|
|
||||||
|
# Wait for playback to finish or interrupt
|
||||||
|
while sd.get_stream().active:
|
||||||
|
if self._interrupted:
|
||||||
|
break
|
||||||
|
time.sleep(0.05)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Signal checker thread to stop
|
||||||
|
self._stop_flag.set()
|
||||||
|
checker_thread.join(timeout=0.5)
|
||||||
|
|
||||||
|
if self._interrupted:
|
||||||
|
return False
|
||||||
|
|
||||||
|
print("✅ Воспроизведение завершено")
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def was_interrupted(self) -> bool:
|
||||||
|
"""Check if the last playback was interrupted."""
|
||||||
|
return self._interrupted
|
||||||
|
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
_tts = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_tts() -> TextToSpeech:
|
||||||
|
"""Get or create TTS instance."""
|
||||||
|
global _tts
|
||||||
|
if _tts is None:
|
||||||
|
_tts = TextToSpeech()
|
||||||
|
return _tts
|
||||||
|
|
||||||
|
|
||||||
|
def speak(text: str, check_interrupt=None) -> bool:
|
||||||
|
"""
|
||||||
|
Synthesize and speak the given text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to speak
|
||||||
|
check_interrupt: Optional callback for interrupt checking
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if completed normally, False if interrupted
|
||||||
|
"""
|
||||||
|
return get_tts().speak(text, check_interrupt)
|
||||||
|
|
||||||
|
|
||||||
|
def was_interrupted() -> bool:
|
||||||
|
"""Check if the last speak() call was interrupted."""
|
||||||
|
return get_tts().was_interrupted
|
||||||
|
|
||||||
|
|
||||||
|
def initialize():
|
||||||
|
"""Pre-initialize TTS model."""
|
||||||
|
get_tts().initialize()
|
||||||
113
wakeword.py
Normal file
113
wakeword.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""
|
||||||
|
Wake word detection module using Porcupine.
|
||||||
|
Listens for the "Alexandr" wake word.
|
||||||
|
"""
|
||||||
|
import pvporcupine
|
||||||
|
import pyaudio
|
||||||
|
import struct
|
||||||
|
from config import PORCUPINE_ACCESS_KEY, PORCUPINE_KEYWORD_PATH
|
||||||
|
|
||||||
|
|
||||||
|
class WakeWordDetector:
|
||||||
|
"""Detects wake word using Porcupine."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.porcupine = None
|
||||||
|
self.audio_stream = None
|
||||||
|
self.pa = None
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""Initialize Porcupine and audio stream."""
|
||||||
|
self.porcupine = pvporcupine.create(
|
||||||
|
access_key=PORCUPINE_ACCESS_KEY,
|
||||||
|
keyword_paths=[str(PORCUPINE_KEYWORD_PATH)]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pa = pyaudio.PyAudio()
|
||||||
|
self.audio_stream = self.pa.open(
|
||||||
|
rate=self.porcupine.sample_rate,
|
||||||
|
channels=1,
|
||||||
|
format=pyaudio.paInt16,
|
||||||
|
input=True,
|
||||||
|
frames_per_buffer=self.porcupine.frame_length
|
||||||
|
)
|
||||||
|
print("🎤 Ожидание wake word 'Alexandr'...")
|
||||||
|
|
||||||
|
def wait_for_wakeword(self) -> bool:
|
||||||
|
"""
|
||||||
|
Blocks until wake word is detected.
|
||||||
|
Returns True when wake word is detected.
|
||||||
|
"""
|
||||||
|
if not self.porcupine:
|
||||||
|
self.initialize()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
||||||
|
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||||
|
|
||||||
|
keyword_index = self.porcupine.process(pcm)
|
||||||
|
if keyword_index >= 0:
|
||||||
|
print("✅ Wake word обнаружен!")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def check_wakeword_once(self) -> bool:
|
||||||
|
"""
|
||||||
|
Non-blocking check for wake word.
|
||||||
|
Returns True if wake word detected, False otherwise.
|
||||||
|
"""
|
||||||
|
if not self.porcupine:
|
||||||
|
self.initialize()
|
||||||
|
|
||||||
|
try:
|
||||||
|
pcm = self.audio_stream.read(self.porcupine.frame_length, exception_on_overflow=False)
|
||||||
|
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||||
|
|
||||||
|
keyword_index = self.porcupine.process(pcm)
|
||||||
|
if keyword_index >= 0:
|
||||||
|
print("🛑 Wake word обнаружен во время ответа!")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""Release resources."""
|
||||||
|
if self.audio_stream:
|
||||||
|
self.audio_stream.close()
|
||||||
|
if self.pa:
|
||||||
|
self.pa.terminate()
|
||||||
|
if self.porcupine:
|
||||||
|
self.porcupine.delete()
|
||||||
|
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
_detector = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_detector() -> WakeWordDetector:
|
||||||
|
"""Get or create wake word detector instance."""
|
||||||
|
global _detector
|
||||||
|
if _detector is None:
|
||||||
|
_detector = WakeWordDetector()
|
||||||
|
return _detector
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_wakeword() -> bool:
|
||||||
|
"""Wait for wake word detection."""
|
||||||
|
return get_detector().wait_for_wakeword()
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
"""Cleanup detector resources."""
|
||||||
|
global _detector
|
||||||
|
if _detector:
|
||||||
|
_detector.cleanup()
|
||||||
|
_detector = None
|
||||||
|
|
||||||
|
|
||||||
|
def check_wakeword_once() -> bool:
|
||||||
|
"""
|
||||||
|
Non-blocking check for wake word.
|
||||||
|
Returns True if wake word detected, False otherwise.
|
||||||
|
"""
|
||||||
|
return get_detector().check_wakeword_once()
|
||||||
Reference in New Issue
Block a user