feat: harden audio device compatibility across machines
This commit is contained in:
@@ -83,10 +83,12 @@ class SpeechRecognizer:
|
||||
def __init__(self):
|
||||
self.dg_client = None
|
||||
self.pa = None
|
||||
self.audio_manager = None
|
||||
self.stream = None
|
||||
self.transcript = ""
|
||||
self.last_successful_operation = datetime.now()
|
||||
self._input_device_index = None
|
||||
self._stream_sample_rate = SAMPLE_RATE
|
||||
|
||||
def initialize(self):
|
||||
"""Инициализация клиента Deepgram и PyAudio."""
|
||||
@@ -103,9 +105,9 @@ class SpeechRecognizer:
|
||||
print(f"❌ Ошибка при создании клиента Deepgram: {e}")
|
||||
raise
|
||||
|
||||
audio_manager = get_audio_manager()
|
||||
self.pa = audio_manager.get_pyaudio()
|
||||
self._input_device_index = audio_manager.get_input_device_index()
|
||||
self.audio_manager = get_audio_manager()
|
||||
self.pa = self.audio_manager.get_pyaudio()
|
||||
self._input_device_index = self.audio_manager.get_input_device_index()
|
||||
print("✅ Deepgram клиент готов")
|
||||
# Обновляем время последней успешной операции
|
||||
self.last_successful_operation = datetime.now()
|
||||
@@ -131,18 +133,23 @@ class SpeechRecognizer:
|
||||
def _get_stream(self):
|
||||
"""Открывает аудиопоток PyAudio, если он еще не открыт."""
|
||||
if self.stream is None:
|
||||
kwargs = {}
|
||||
if self._input_device_index is not None:
|
||||
kwargs["input_device_index"] = self._input_device_index
|
||||
|
||||
self.stream = self.pa.open(
|
||||
rate=SAMPLE_RATE,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=4096,
|
||||
**kwargs,
|
||||
if self.audio_manager is None:
|
||||
self.audio_manager = get_audio_manager()
|
||||
self.stream, self._input_device_index, self._stream_sample_rate = (
|
||||
self.audio_manager.open_input_stream(
|
||||
rate=SAMPLE_RATE,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
frames_per_buffer=4096,
|
||||
preferred_index=self._input_device_index,
|
||||
fallback_rates=[48000, 44100, 32000, 22050, 16000, 8000],
|
||||
)
|
||||
)
|
||||
if self._stream_sample_rate != SAMPLE_RATE:
|
||||
print(
|
||||
f"⚠️ STT mic stream uses fallback rate={self._stream_sample_rate} "
|
||||
f"(requested {SAMPLE_RATE})"
|
||||
)
|
||||
return self.stream
|
||||
|
||||
async def _process_audio(
|
||||
@@ -242,7 +249,7 @@ class SpeechRecognizer:
|
||||
smart_format=True, # Расстановка знаков препинания
|
||||
encoding="linear16",
|
||||
channels=1,
|
||||
sample_rate=SAMPLE_RATE,
|
||||
sample_rate=self._stream_sample_rate,
|
||||
interim_results=True,
|
||||
utterance_end_ms=int(POST_SPEECH_SILENCE_TIMEOUT_SECONDS * 1000),
|
||||
vad_events=True,
|
||||
|
||||
@@ -14,9 +14,11 @@ import time
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pyaudio
|
||||
import sounddevice as sd
|
||||
import torch
|
||||
|
||||
from ..core.audio_manager import get_audio_manager
|
||||
from ..core.config import TTS_EN_SPEAKER, TTS_SAMPLE_RATE, TTS_SPEAKER
|
||||
|
||||
# Подавляем предупреждения Silero о длинном тексте (мы сами его режем)
|
||||
@@ -36,6 +38,8 @@ class TextToSpeech:
|
||||
self.speaker_en = TTS_EN_SPEAKER
|
||||
self._interrupted = False
|
||||
self._stop_flag = threading.Event()
|
||||
self._audio_manager = None
|
||||
self._output_device_index = None
|
||||
|
||||
def _load_model(self, language: str):
|
||||
"""
|
||||
@@ -232,14 +236,13 @@ class TextToSpeech:
|
||||
audio_np = audio.numpy()
|
||||
|
||||
if check_interrupt:
|
||||
# Воспроизведение с проверкой прерывания (сложная логика)
|
||||
if not self._play_with_interrupt(audio_np, check_interrupt):
|
||||
if not self._play_audio_with_interrupt(audio_np, check_interrupt):
|
||||
success = False
|
||||
break
|
||||
else:
|
||||
# Обычное воспроизведение (блокирующее)
|
||||
sd.play(audio_np, self.sample_rate)
|
||||
sd.wait()
|
||||
if not self._play_audio_blocking(audio_np):
|
||||
success = False
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка TTS (часть {i + 1}/{total_chunks}): {e}")
|
||||
@@ -293,6 +296,75 @@ class TextToSpeech:
|
||||
text, check_interrupt=check_interrupt, language=language
|
||||
)
|
||||
|
||||
def _resample_audio(self, audio_np: np.ndarray, src_rate: int, dst_rate: int):
|
||||
if src_rate == dst_rate:
|
||||
return audio_np.astype(np.float32, copy=False)
|
||||
if audio_np.size == 0:
|
||||
return np.asarray([], dtype=np.float32)
|
||||
|
||||
target_length = max(1, int(round(audio_np.size * dst_rate / src_rate)))
|
||||
x_old = np.arange(audio_np.size, dtype=np.float32)
|
||||
x_new = np.linspace(0.0, float(max(0, audio_np.size - 1)), target_length)
|
||||
resampled = np.interp(x_new, x_old, audio_np.astype(np.float32))
|
||||
return np.asarray(resampled, dtype=np.float32)
|
||||
|
||||
def _play_audio_blocking(self, audio_np: np.ndarray) -> bool:
|
||||
try:
|
||||
sd.play(audio_np, self.sample_rate)
|
||||
sd.wait()
|
||||
return True
|
||||
except Exception as exc:
|
||||
print(f"⚠️ sounddevice playback failed, fallback to PyAudio: {exc}")
|
||||
return self._play_with_pyaudio(audio_np, check_interrupt=None)
|
||||
|
||||
def _play_audio_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
|
||||
try:
|
||||
return self._play_with_interrupt_sounddevice(audio_np, check_interrupt)
|
||||
except Exception as exc:
|
||||
print(
|
||||
"⚠️ sounddevice playback-with-interrupt failed, fallback to PyAudio: "
|
||||
f"{exc}"
|
||||
)
|
||||
return self._play_with_pyaudio(audio_np, check_interrupt=check_interrupt)
|
||||
|
||||
def _play_with_pyaudio(self, audio_np: np.ndarray, check_interrupt=None) -> bool:
|
||||
if self._audio_manager is None:
|
||||
self._audio_manager = get_audio_manager()
|
||||
|
||||
output_stream = None
|
||||
try:
|
||||
output_stream, self._output_device_index, out_rate = (
|
||||
self._audio_manager.open_output_stream(
|
||||
rate=self.sample_rate,
|
||||
channels=1,
|
||||
format=pyaudio.paFloat32,
|
||||
preferred_index=self._output_device_index,
|
||||
fallback_rates=[48000, 44100, 32000, 22050],
|
||||
)
|
||||
)
|
||||
pcm = self._resample_audio(audio_np, self.sample_rate, out_rate)
|
||||
chunk_size = max(256, int(out_rate * 0.03))
|
||||
|
||||
for offset in range(0, len(pcm), chunk_size):
|
||||
if check_interrupt and check_interrupt():
|
||||
self._interrupted = True
|
||||
return False
|
||||
output_stream.write(pcm[offset : offset + chunk_size].tobytes())
|
||||
return True
|
||||
except Exception as exc:
|
||||
print(f"❌ PyAudio playback failed: {exc}")
|
||||
return False
|
||||
finally:
|
||||
if output_stream is not None:
|
||||
try:
|
||||
output_stream.stop_stream()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
output_stream.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _check_interrupt_worker(self, check_interrupt):
|
||||
"""
|
||||
Фоновая функция для потока: постоянно опрашивает check_interrupt.
|
||||
@@ -308,7 +380,9 @@ class TextToSpeech:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _play_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
|
||||
def _play_with_interrupt_sounddevice(
|
||||
self, audio_np: np.ndarray, check_interrupt
|
||||
) -> bool:
|
||||
"""
|
||||
Воспроизводит аудио, параллельно проверяя условие прерывания в отдельном потоке.
|
||||
"""
|
||||
|
||||
@@ -9,6 +9,7 @@ Listens for the "Alexandr" wake word.
|
||||
import pvporcupine
|
||||
import pyaudio
|
||||
import struct
|
||||
import numpy as np
|
||||
from ..core.config import (
|
||||
PORCUPINE_ACCESS_KEY,
|
||||
PORCUPINE_KEYWORD_PATH,
|
||||
@@ -24,6 +25,11 @@ class WakeWordDetector:
|
||||
self.porcupine = None
|
||||
self.audio_stream = None
|
||||
self.pa = None
|
||||
self._audio_manager = None
|
||||
self._input_device_index = None
|
||||
self._capture_sample_rate = None
|
||||
self._capture_frame_length = None
|
||||
self._resampled_pcm_buffer = np.array([], dtype=np.int16)
|
||||
self._stream_closed = True # Флаг состояния потока (закрыт/открыт)
|
||||
self._last_hit_ts = 0.0
|
||||
|
||||
@@ -37,11 +43,13 @@ class WakeWordDetector:
|
||||
)
|
||||
|
||||
# Используем общий экземпляр PyAudio
|
||||
audio_manager = get_audio_manager()
|
||||
self.pa = audio_manager.get_pyaudio()
|
||||
self._input_device_index = audio_manager.get_input_device_index()
|
||||
self._audio_manager = get_audio_manager()
|
||||
self.pa = self._audio_manager.get_pyaudio()
|
||||
self._open_stream()
|
||||
print(f"🎤 Ожидание wake word 'Alexandr' (sens={PORCUPINE_SENSITIVITY:.2f})...")
|
||||
print(
|
||||
"🎤 Ожидание wake word 'Alexandr' "
|
||||
f"(sens={PORCUPINE_SENSITIVITY:.2f}, mic_rate={self._capture_sample_rate})..."
|
||||
)
|
||||
|
||||
def _open_stream(self):
|
||||
"""Открытие аудиопотока с микрофона."""
|
||||
@@ -55,19 +63,28 @@ class WakeWordDetector:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Открываем поток с параметрами, которые требует Porcupine
|
||||
kwargs = {}
|
||||
if getattr(self, "_input_device_index", None) is not None:
|
||||
kwargs["input_device_index"] = self._input_device_index
|
||||
|
||||
self.audio_stream = self.pa.open(
|
||||
rate=self.porcupine.sample_rate,
|
||||
target_rate = int(self.porcupine.sample_rate)
|
||||
fallback_rates = [48000, 44100, 32000, 22050, 16000]
|
||||
self.audio_stream, self._input_device_index, actual_rate = self._audio_manager.open_input_stream(
|
||||
rate=target_rate,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=self.porcupine.frame_length,
|
||||
**kwargs,
|
||||
preferred_index=self._input_device_index,
|
||||
fallback_rates=fallback_rates,
|
||||
)
|
||||
self._capture_sample_rate = int(actual_rate)
|
||||
self._capture_frame_length = max(
|
||||
64,
|
||||
int(
|
||||
round(
|
||||
self.porcupine.frame_length
|
||||
* self._capture_sample_rate
|
||||
/ target_rate
|
||||
)
|
||||
),
|
||||
)
|
||||
self._resampled_pcm_buffer = np.array([], dtype=np.int16)
|
||||
self._stream_closed = False
|
||||
|
||||
def stop_monitoring(self):
|
||||
@@ -80,6 +97,40 @@ class WakeWordDetector:
|
||||
pass
|
||||
self._stream_closed = True
|
||||
|
||||
def _resample_to_target_rate(self, pcm: np.ndarray) -> np.ndarray:
|
||||
target_rate = int(self.porcupine.sample_rate)
|
||||
source_rate = int(self._capture_sample_rate or target_rate)
|
||||
if source_rate == target_rate:
|
||||
return pcm
|
||||
if pcm.size == 0:
|
||||
return np.array([], dtype=np.int16)
|
||||
target_length = max(1, int(round(pcm.size * target_rate / source_rate)))
|
||||
x_old = np.arange(pcm.size, dtype=np.float32)
|
||||
x_new = np.linspace(0.0, float(max(0, pcm.size - 1)), target_length)
|
||||
resampled = np.interp(x_new, x_old, pcm.astype(np.float32))
|
||||
return np.asarray(resampled, dtype=np.int16)
|
||||
|
||||
def _read_porcupine_frame(self):
|
||||
target_length = int(self.porcupine.frame_length)
|
||||
if self._capture_sample_rate == self.porcupine.sample_rate:
|
||||
pcm = self.audio_stream.read(target_length, exception_on_overflow=False)
|
||||
return np.asarray(struct.unpack_from("h" * target_length, pcm), dtype=np.int16)
|
||||
|
||||
while self._resampled_pcm_buffer.size < target_length:
|
||||
raw = self.audio_stream.read(
|
||||
self._capture_frame_length, exception_on_overflow=False
|
||||
)
|
||||
captured = np.frombuffer(raw, dtype=np.int16)
|
||||
converted = self._resample_to_target_rate(captured)
|
||||
if converted.size:
|
||||
self._resampled_pcm_buffer = np.concatenate(
|
||||
(self._resampled_pcm_buffer, converted)
|
||||
)
|
||||
|
||||
frame = self._resampled_pcm_buffer[:target_length]
|
||||
self._resampled_pcm_buffer = self._resampled_pcm_buffer[target_length:]
|
||||
return frame
|
||||
|
||||
def wait_for_wakeword(self, timeout: float = None) -> bool:
|
||||
"""
|
||||
Блокирующая функция: ждет, пока не будет услышана фраза "Alexandr"
|
||||
@@ -107,14 +158,10 @@ class WakeWordDetector:
|
||||
return False
|
||||
|
||||
# Читаем небольшой кусочек аудио (frame)
|
||||
pcm = self.audio_stream.read(
|
||||
self.porcupine.frame_length, exception_on_overflow=False
|
||||
)
|
||||
# Конвертируем байты в кортеж чисел (требование Porcupine)
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||
pcm = self._read_porcupine_frame()
|
||||
|
||||
# Обрабатываем фрейм через Porcupine
|
||||
keyword_index = self.porcupine.process(pcm)
|
||||
keyword_index = self.porcupine.process(pcm.tolist())
|
||||
|
||||
# Если keyword_index >= 0, значит ключевое слово обнаружено
|
||||
if keyword_index >= 0:
|
||||
@@ -140,12 +187,9 @@ class WakeWordDetector:
|
||||
try:
|
||||
self._open_stream()
|
||||
|
||||
pcm = self.audio_stream.read(
|
||||
self.porcupine.frame_length, exception_on_overflow=False
|
||||
)
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||
pcm = self._read_porcupine_frame()
|
||||
|
||||
keyword_index = self.porcupine.process(pcm)
|
||||
keyword_index = self.porcupine.process(pcm.tolist())
|
||||
if keyword_index >= 0:
|
||||
now = time.time()
|
||||
if now - self._last_hit_ts < 0.2: # Уменьшаем интервал для более быстрой реакции
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
import pyaudio
|
||||
import threading
|
||||
|
||||
from .config import AUDIO_INPUT_DEVICE_INDEX, AUDIO_INPUT_DEVICE_NAME
|
||||
from .config import (
|
||||
AUDIO_INPUT_DEVICE_INDEX,
|
||||
AUDIO_INPUT_DEVICE_NAME,
|
||||
AUDIO_OUTPUT_DEVICE_INDEX,
|
||||
AUDIO_OUTPUT_DEVICE_NAME,
|
||||
)
|
||||
|
||||
|
||||
class AudioManager:
|
||||
@@ -14,7 +19,9 @@ class AudioManager:
|
||||
cls._instance = super(AudioManager, cls).__new__(cls)
|
||||
cls._instance.pa = pyaudio.PyAudio()
|
||||
cls._instance._input_device_index = None
|
||||
cls._instance._output_device_index = None
|
||||
cls._instance._input_device_resolved = False
|
||||
cls._instance._output_device_resolved = False
|
||||
print("🔊 AudioManager: PyAudio initialized (Global)")
|
||||
return cls._instance
|
||||
|
||||
@@ -33,22 +40,84 @@ class AudioManager:
|
||||
self._input_device_resolved = True
|
||||
return self._input_device_index
|
||||
|
||||
def get_output_device_index(self):
|
||||
"""
|
||||
Returns PortAudio output device index or None (let PortAudio pick default).
|
||||
Raises a RuntimeError with a helpful message if no output devices exist.
|
||||
"""
|
||||
if self._output_device_resolved:
|
||||
return self._output_device_index
|
||||
|
||||
self._output_device_index = self._resolve_output_device_index()
|
||||
self._output_device_resolved = True
|
||||
return self._output_device_index
|
||||
|
||||
def _get_device_count(self) -> int:
|
||||
if self.pa is None:
|
||||
return 0
|
||||
return int(self.pa.get_device_count() or 0)
|
||||
|
||||
def _is_input_device(self, idx: int) -> bool:
|
||||
try:
|
||||
info = self.pa.get_device_info_by_index(idx)
|
||||
except Exception:
|
||||
return False
|
||||
return int(info.get("maxInputChannels") or 0) > 0
|
||||
|
||||
def _is_output_device(self, idx: int) -> bool:
|
||||
try:
|
||||
info = self.pa.get_device_info_by_index(idx)
|
||||
except Exception:
|
||||
return False
|
||||
return int(info.get("maxOutputChannels") or 0) > 0
|
||||
|
||||
def _find_device_by_name(self, needle: str, input_kind: bool):
|
||||
if not needle:
|
||||
return None
|
||||
lowered = needle.lower()
|
||||
count = self._get_device_count()
|
||||
for idx in range(count):
|
||||
if input_kind and not self._is_input_device(idx):
|
||||
continue
|
||||
if not input_kind and not self._is_output_device(idx):
|
||||
continue
|
||||
try:
|
||||
name = str(self.pa.get_device_info_by_index(idx).get("name") or "")
|
||||
except Exception:
|
||||
continue
|
||||
if lowered in name.lower():
|
||||
return idx
|
||||
return None
|
||||
|
||||
def _get_default_input_index(self):
|
||||
try:
|
||||
info = self.pa.get_default_input_device_info()
|
||||
idx = int(info.get("index"))
|
||||
if self._is_input_device(idx):
|
||||
return idx
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _get_default_output_index(self):
|
||||
try:
|
||||
info = self.pa.get_default_output_device_info()
|
||||
idx = int(info.get("index"))
|
||||
if self._is_output_device(idx):
|
||||
return idx
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _resolve_input_device_index(self):
|
||||
if self.pa is None:
|
||||
return None
|
||||
|
||||
device_count = int(self.pa.get_device_count() or 0)
|
||||
|
||||
def is_input_device(idx: int) -> bool:
|
||||
try:
|
||||
info = self.pa.get_device_info_by_index(idx)
|
||||
except Exception:
|
||||
return False
|
||||
return int(info.get("maxInputChannels") or 0) > 0
|
||||
device_count = self._get_device_count()
|
||||
|
||||
if AUDIO_INPUT_DEVICE_INDEX is not None:
|
||||
idx = int(AUDIO_INPUT_DEVICE_INDEX)
|
||||
if 0 <= idx < device_count and is_input_device(idx):
|
||||
if 0 <= idx < device_count and self._is_input_device(idx):
|
||||
return idx
|
||||
raise RuntimeError(
|
||||
"Audio input initialization failed: invalid AUDIO_INPUT_DEVICE_INDEX="
|
||||
@@ -57,16 +126,9 @@ class AudioManager:
|
||||
)
|
||||
|
||||
if AUDIO_INPUT_DEVICE_NAME:
|
||||
needle = AUDIO_INPUT_DEVICE_NAME.lower()
|
||||
for idx in range(device_count):
|
||||
if not is_input_device(idx):
|
||||
continue
|
||||
try:
|
||||
name = str(self.pa.get_device_info_by_index(idx).get("name") or "")
|
||||
except Exception:
|
||||
continue
|
||||
if needle in name.lower():
|
||||
return idx
|
||||
match_idx = self._find_device_by_name(AUDIO_INPUT_DEVICE_NAME, input_kind=True)
|
||||
if match_idx is not None:
|
||||
return match_idx
|
||||
|
||||
raise RuntimeError(
|
||||
"Audio input initialization failed: could not find an input device "
|
||||
@@ -76,17 +138,13 @@ class AudioManager:
|
||||
)
|
||||
|
||||
# Default input device (if PortAudio has one).
|
||||
try:
|
||||
default_info = self.pa.get_default_input_device_info()
|
||||
default_idx = int(default_info.get("index"))
|
||||
if 0 <= default_idx < device_count and is_input_device(default_idx):
|
||||
return default_idx
|
||||
except Exception:
|
||||
pass
|
||||
default_idx = self._get_default_input_index()
|
||||
if default_idx is not None:
|
||||
return default_idx
|
||||
|
||||
# Fallback: first input device.
|
||||
for idx in range(device_count):
|
||||
if is_input_device(idx):
|
||||
if self._is_input_device(idx):
|
||||
return idx
|
||||
|
||||
raise RuntimeError(
|
||||
@@ -96,12 +154,185 @@ class AudioManager:
|
||||
+ self.describe_input_devices()
|
||||
)
|
||||
|
||||
def _resolve_output_device_index(self):
|
||||
if self.pa is None:
|
||||
return None
|
||||
|
||||
device_count = self._get_device_count()
|
||||
|
||||
if AUDIO_OUTPUT_DEVICE_INDEX is not None:
|
||||
idx = int(AUDIO_OUTPUT_DEVICE_INDEX)
|
||||
if 0 <= idx < device_count and self._is_output_device(idx):
|
||||
return idx
|
||||
raise RuntimeError(
|
||||
"Audio output initialization failed: invalid AUDIO_OUTPUT_DEVICE_INDEX="
|
||||
f"{AUDIO_OUTPUT_DEVICE_INDEX}. Available output devices:\n"
|
||||
+ self.describe_output_devices()
|
||||
)
|
||||
|
||||
if AUDIO_OUTPUT_DEVICE_NAME:
|
||||
match_idx = self._find_device_by_name(
|
||||
AUDIO_OUTPUT_DEVICE_NAME, input_kind=False
|
||||
)
|
||||
if match_idx is not None:
|
||||
return match_idx
|
||||
raise RuntimeError(
|
||||
"Audio output initialization failed: could not find an output device "
|
||||
f"matching AUDIO_OUTPUT_DEVICE_NAME={AUDIO_OUTPUT_DEVICE_NAME!r}. "
|
||||
"Available output devices:\n"
|
||||
+ self.describe_output_devices()
|
||||
)
|
||||
|
||||
default_idx = self._get_default_output_index()
|
||||
if default_idx is not None:
|
||||
return default_idx
|
||||
|
||||
for idx in range(device_count):
|
||||
if self._is_output_device(idx):
|
||||
return idx
|
||||
|
||||
raise RuntimeError(
|
||||
"Audio output initialization failed: no output devices found. "
|
||||
"Check speaker connection and PipeWire/PulseAudio. "
|
||||
"PortAudio devices:\n"
|
||||
+ self.describe_output_devices()
|
||||
)
|
||||
|
||||
def _ordered_input_candidates(self, preferred_index=None):
|
||||
candidates = []
|
||||
|
||||
def add(idx):
|
||||
if idx not in candidates:
|
||||
candidates.append(idx)
|
||||
|
||||
if preferred_index is not None:
|
||||
add(preferred_index)
|
||||
else:
|
||||
try:
|
||||
add(self.get_input_device_index())
|
||||
except Exception:
|
||||
pass
|
||||
add(self._get_default_input_index())
|
||||
add(None) # Let PortAudio decide default path.
|
||||
for idx in range(self._get_device_count()):
|
||||
if self._is_input_device(idx):
|
||||
add(idx)
|
||||
|
||||
return [idx for idx in candidates if idx is None or self._is_input_device(idx)]
|
||||
|
||||
def _ordered_output_candidates(self, preferred_index=None):
|
||||
candidates = []
|
||||
|
||||
def add(idx):
|
||||
if idx not in candidates:
|
||||
candidates.append(idx)
|
||||
|
||||
if preferred_index is not None:
|
||||
add(preferred_index)
|
||||
else:
|
||||
try:
|
||||
add(self.get_output_device_index())
|
||||
except Exception:
|
||||
pass
|
||||
add(self._get_default_output_index())
|
||||
add(None) # Let PortAudio decide default path.
|
||||
for idx in range(self._get_device_count()):
|
||||
if self._is_output_device(idx):
|
||||
add(idx)
|
||||
|
||||
return [idx for idx in candidates if idx is None or self._is_output_device(idx)]
|
||||
|
||||
def open_input_stream(
|
||||
self,
|
||||
*,
|
||||
rate: int,
|
||||
channels: int,
|
||||
format,
|
||||
frames_per_buffer: int,
|
||||
preferred_index=None,
|
||||
fallback_rates=None,
|
||||
):
|
||||
if self.pa is None:
|
||||
raise RuntimeError("PyAudio is not initialized")
|
||||
|
||||
fallback_rates = fallback_rates or []
|
||||
rates = [int(rate)] + [int(r) for r in fallback_rates if int(r) > 0 and int(r) != int(rate)]
|
||||
errors = []
|
||||
|
||||
for device_idx in self._ordered_input_candidates(preferred_index):
|
||||
for attempt_rate in rates:
|
||||
fb = max(
|
||||
64, int(round(frames_per_buffer * attempt_rate / max(1, int(rate))))
|
||||
)
|
||||
kwargs = {
|
||||
"rate": attempt_rate,
|
||||
"channels": channels,
|
||||
"format": format,
|
||||
"input": True,
|
||||
"frames_per_buffer": fb,
|
||||
}
|
||||
if device_idx is not None:
|
||||
kwargs["input_device_index"] = device_idx
|
||||
try:
|
||||
stream = self.pa.open(**kwargs)
|
||||
return stream, device_idx, attempt_rate
|
||||
except Exception as exc:
|
||||
errors.append(
|
||||
f"device={device_idx!r}, rate={attempt_rate}: {exc}"
|
||||
)
|
||||
|
||||
joined_errors = "\n".join(errors[:12])
|
||||
raise RuntimeError(
|
||||
"Audio input initialization failed. Tried multiple devices/rates.\n"
|
||||
f"{joined_errors}\nAvailable input devices:\n{self.describe_input_devices()}"
|
||||
)
|
||||
|
||||
def open_output_stream(
|
||||
self,
|
||||
*,
|
||||
rate: int,
|
||||
channels: int,
|
||||
format,
|
||||
preferred_index=None,
|
||||
fallback_rates=None,
|
||||
):
|
||||
if self.pa is None:
|
||||
raise RuntimeError("PyAudio is not initialized")
|
||||
|
||||
fallback_rates = fallback_rates or []
|
||||
rates = [int(rate)] + [int(r) for r in fallback_rates if int(r) > 0 and int(r) != int(rate)]
|
||||
errors = []
|
||||
|
||||
for device_idx in self._ordered_output_candidates(preferred_index):
|
||||
for attempt_rate in rates:
|
||||
kwargs = {
|
||||
"rate": attempt_rate,
|
||||
"channels": channels,
|
||||
"format": format,
|
||||
"output": True,
|
||||
}
|
||||
if device_idx is not None:
|
||||
kwargs["output_device_index"] = device_idx
|
||||
try:
|
||||
stream = self.pa.open(**kwargs)
|
||||
return stream, device_idx, attempt_rate
|
||||
except Exception as exc:
|
||||
errors.append(
|
||||
f"device={device_idx!r}, rate={attempt_rate}: {exc}"
|
||||
)
|
||||
|
||||
joined_errors = "\n".join(errors[:12])
|
||||
raise RuntimeError(
|
||||
"Audio output initialization failed. Tried multiple devices/rates.\n"
|
||||
f"{joined_errors}\nAvailable output devices:\n{self.describe_output_devices()}"
|
||||
)
|
||||
|
||||
def describe_input_devices(self, limit: int = 20) -> str:
|
||||
if self.pa is None:
|
||||
return "<PyAudio not initialized>"
|
||||
|
||||
items = []
|
||||
count = int(self.pa.get_device_count() or 0)
|
||||
count = self._get_device_count()
|
||||
for idx in range(count):
|
||||
try:
|
||||
info = self.pa.get_device_info_by_index(idx)
|
||||
@@ -116,6 +347,26 @@ class AudioManager:
|
||||
break
|
||||
return "\n".join(items) if items else "<no input devices>"
|
||||
|
||||
def describe_output_devices(self, limit: int = 20) -> str:
|
||||
if self.pa is None:
|
||||
return "<PyAudio not initialized>"
|
||||
|
||||
items = []
|
||||
count = self._get_device_count()
|
||||
for idx in range(count):
|
||||
try:
|
||||
info = self.pa.get_device_info_by_index(idx)
|
||||
except Exception:
|
||||
continue
|
||||
max_out = int(info.get("maxOutputChannels") or 0)
|
||||
if max_out <= 0:
|
||||
continue
|
||||
name = str(info.get("name") or "").strip()
|
||||
items.append(f"[{idx}] {name} (out={max_out})")
|
||||
if len(items) >= limit:
|
||||
break
|
||||
return "\n".join(items) if items else "<no output devices>"
|
||||
|
||||
def cleanup(self):
|
||||
if self.pa:
|
||||
self.pa.terminate()
|
||||
|
||||
@@ -88,6 +88,20 @@ try:
|
||||
except Exception:
|
||||
AUDIO_INPUT_DEVICE_INDEX = None
|
||||
|
||||
# Выбор устройства вывода (динамик).
|
||||
# Если не задано, используем default output device PortAudio (если есть).
|
||||
# Пример:
|
||||
# - AUDIO_OUTPUT_DEVICE_NAME=pulse
|
||||
# - AUDIO_OUTPUT_DEVICE_INDEX=5
|
||||
AUDIO_OUTPUT_DEVICE_NAME = os.getenv("AUDIO_OUTPUT_DEVICE_NAME", "").strip() or None
|
||||
_audio_out_index_raw = os.getenv("AUDIO_OUTPUT_DEVICE_INDEX", "").strip()
|
||||
try:
|
||||
AUDIO_OUTPUT_DEVICE_INDEX = (
|
||||
int(_audio_out_index_raw) if _audio_out_index_raw else None
|
||||
)
|
||||
except Exception:
|
||||
AUDIO_OUTPUT_DEVICE_INDEX = None
|
||||
|
||||
# --- Настройка времени ---
|
||||
# Устанавливаем часовой пояс на Москву, чтобы будильник работал корректно
|
||||
|
||||
|
||||
Reference in New Issue
Block a user