translator но без озвучивания слов на английском
This commit is contained in:
70
tts.py
70
tts.py
@@ -11,7 +11,7 @@ import threading
|
||||
import time
|
||||
import warnings
|
||||
import re
|
||||
from config import TTS_SPEAKER, TTS_SAMPLE_RATE
|
||||
from config import TTS_SPEAKER, TTS_EN_SPEAKER, TTS_SAMPLE_RATE
|
||||
|
||||
# Suppress Silero TTS warning about text length
|
||||
warnings.filterwarnings("ignore", message="Text string is longer than 1000 symbols")
|
||||
@@ -21,27 +21,55 @@ class TextToSpeech:
|
||||
"""Text-to-Speech using Silero TTS with wake word interruption support."""
|
||||
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
self.models = {}
|
||||
self.sample_rate = TTS_SAMPLE_RATE
|
||||
self.speaker = TTS_SPEAKER
|
||||
self.speakers = {
|
||||
"ru": TTS_SPEAKER,
|
||||
"en": TTS_EN_SPEAKER,
|
||||
}
|
||||
self._interrupted = False
|
||||
self._stop_flag = threading.Event()
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize Silero TTS model."""
|
||||
print("📦 Загрузка модели Silero TTS v5...")
|
||||
def _load_model(self, language: str):
|
||||
"""Load and cache Silero TTS model for the given language."""
|
||||
if language in self.models:
|
||||
return self.models[language]
|
||||
|
||||
# Load Silero TTS model
|
||||
device = torch.device('cpu')
|
||||
self.model, _ = torch.hub.load(
|
||||
model_config = {
|
||||
"ru": {"language": "ru", "model_id": "v5_ru"},
|
||||
"en": {"language": "en", "model_id": "v3_en"},
|
||||
}
|
||||
|
||||
if language not in model_config:
|
||||
raise ValueError(f"Unsupported TTS language: {language}")
|
||||
|
||||
config = model_config[language]
|
||||
print(f"📦 Загрузка модели Silero TTS ({language})...")
|
||||
|
||||
device = torch.device("cpu")
|
||||
model, _ = torch.hub.load(
|
||||
repo_or_dir="snakers4/silero-models",
|
||||
model="silero_tts",
|
||||
language="ru",
|
||||
speaker="v5_ru",
|
||||
language=config["language"],
|
||||
speaker=config["model_id"],
|
||||
)
|
||||
self.model.to(device)
|
||||
model.to(device)
|
||||
|
||||
print(f"✅ Модель TTS v5 загружена (голос: {self.speaker})")
|
||||
self.models[language] = model
|
||||
return model
|
||||
|
||||
def _get_speaker(self, language: str, model) -> str:
|
||||
"""Return a valid speaker for the loaded model."""
|
||||
speaker = self.speakers.get(language)
|
||||
if hasattr(model, "speakers") and speaker not in model.speakers:
|
||||
fallback = model.speakers[0] if model.speakers else speaker
|
||||
print(f"⚠️ Голос '{speaker}' недоступен, использую '{fallback}'")
|
||||
return fallback
|
||||
return speaker
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize default (Russian) TTS model."""
|
||||
self._load_model("ru")
|
||||
|
||||
def _split_text(self, text: str, max_length: int = 900) -> list[str]:
|
||||
"""Split text into chunks smaller than max_length."""
|
||||
@@ -83,13 +111,14 @@ class TextToSpeech:
|
||||
# Filter empty chunks
|
||||
return [c for c in chunks if c]
|
||||
|
||||
def speak(self, text: str, check_interrupt=None) -> bool:
|
||||
def speak(self, text: str, check_interrupt=None, language: str = "ru") -> bool:
|
||||
"""
|
||||
Convert text to speech and play it.
|
||||
|
||||
Args:
|
||||
text: Text to synthesize and speak
|
||||
check_interrupt: Optional callback function that returns True if playback should stop
|
||||
language: Language code for voice selection ("ru" or "en")
|
||||
|
||||
Returns:
|
||||
True if playback completed normally, False if interrupted
|
||||
@@ -97,8 +126,8 @@ class TextToSpeech:
|
||||
if not text.strip():
|
||||
return True
|
||||
|
||||
if not self.model:
|
||||
self.initialize()
|
||||
model = self._load_model(language)
|
||||
speaker = self._get_speaker(language, model)
|
||||
|
||||
# Split text into manageable chunks
|
||||
chunks = self._split_text(text)
|
||||
@@ -120,8 +149,8 @@ class TextToSpeech:
|
||||
|
||||
try:
|
||||
# Generate audio for chunk
|
||||
audio = self.model.apply_tts(
|
||||
text=chunk, speaker=self.speaker, sample_rate=self.sample_rate
|
||||
audio = model.apply_tts(
|
||||
text=chunk, speaker=speaker, sample_rate=self.sample_rate
|
||||
)
|
||||
|
||||
# Convert to numpy array
|
||||
@@ -218,18 +247,19 @@ def get_tts() -> TextToSpeech:
|
||||
return _tts
|
||||
|
||||
|
||||
def speak(text: str, check_interrupt=None) -> bool:
|
||||
def speak(text: str, check_interrupt=None, language: str = "ru") -> bool:
|
||||
"""
|
||||
Synthesize and speak the given text.
|
||||
|
||||
Args:
|
||||
text: Text to speak
|
||||
check_interrupt: Optional callback for interrupt checking
|
||||
language: Language code for voice selection ("ru" or "en")
|
||||
|
||||
Returns:
|
||||
True if completed normally, False if interrupted
|
||||
"""
|
||||
return get_tts().speak(text, check_interrupt)
|
||||
return get_tts().speak(text, check_interrupt, language)
|
||||
|
||||
|
||||
def was_interrupted() -> bool:
|
||||
|
||||
Reference in New Issue
Block a user