feat: harden audio device compatibility across machines
This commit is contained in:
@@ -83,10 +83,12 @@ class SpeechRecognizer:
|
||||
def __init__(self):
|
||||
self.dg_client = None
|
||||
self.pa = None
|
||||
self.audio_manager = None
|
||||
self.stream = None
|
||||
self.transcript = ""
|
||||
self.last_successful_operation = datetime.now()
|
||||
self._input_device_index = None
|
||||
self._stream_sample_rate = SAMPLE_RATE
|
||||
|
||||
def initialize(self):
|
||||
"""Инициализация клиента Deepgram и PyAudio."""
|
||||
@@ -103,9 +105,9 @@ class SpeechRecognizer:
|
||||
print(f"❌ Ошибка при создании клиента Deepgram: {e}")
|
||||
raise
|
||||
|
||||
audio_manager = get_audio_manager()
|
||||
self.pa = audio_manager.get_pyaudio()
|
||||
self._input_device_index = audio_manager.get_input_device_index()
|
||||
self.audio_manager = get_audio_manager()
|
||||
self.pa = self.audio_manager.get_pyaudio()
|
||||
self._input_device_index = self.audio_manager.get_input_device_index()
|
||||
print("✅ Deepgram клиент готов")
|
||||
# Обновляем время последней успешной операции
|
||||
self.last_successful_operation = datetime.now()
|
||||
@@ -131,18 +133,23 @@ class SpeechRecognizer:
|
||||
def _get_stream(self):
|
||||
"""Открывает аудиопоток PyAudio, если он еще не открыт."""
|
||||
if self.stream is None:
|
||||
kwargs = {}
|
||||
if self._input_device_index is not None:
|
||||
kwargs["input_device_index"] = self._input_device_index
|
||||
|
||||
self.stream = self.pa.open(
|
||||
rate=SAMPLE_RATE,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=4096,
|
||||
**kwargs,
|
||||
if self.audio_manager is None:
|
||||
self.audio_manager = get_audio_manager()
|
||||
self.stream, self._input_device_index, self._stream_sample_rate = (
|
||||
self.audio_manager.open_input_stream(
|
||||
rate=SAMPLE_RATE,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
frames_per_buffer=4096,
|
||||
preferred_index=self._input_device_index,
|
||||
fallback_rates=[48000, 44100, 32000, 22050, 16000, 8000],
|
||||
)
|
||||
)
|
||||
if self._stream_sample_rate != SAMPLE_RATE:
|
||||
print(
|
||||
f"⚠️ STT mic stream uses fallback rate={self._stream_sample_rate} "
|
||||
f"(requested {SAMPLE_RATE})"
|
||||
)
|
||||
return self.stream
|
||||
|
||||
async def _process_audio(
|
||||
@@ -242,7 +249,7 @@ class SpeechRecognizer:
|
||||
smart_format=True, # Расстановка знаков препинания
|
||||
encoding="linear16",
|
||||
channels=1,
|
||||
sample_rate=SAMPLE_RATE,
|
||||
sample_rate=self._stream_sample_rate,
|
||||
interim_results=True,
|
||||
utterance_end_ms=int(POST_SPEECH_SILENCE_TIMEOUT_SECONDS * 1000),
|
||||
vad_events=True,
|
||||
|
||||
Reference in New Issue
Block a user