From 6c2702d5e340f5500cfe1e7b107f8e85d54a0ff5 Mon Sep 17 00:00:00 2001 From: future Date: Thu, 12 Mar 2026 14:08:20 +0300 Subject: [PATCH] feat: harden audio device compatibility across machines --- .env.example | 5 + README.md | 11 ++ app/audio/stt.py | 37 +++-- app/audio/tts.py | 86 ++++++++++- app/audio/wakeword.py | 92 +++++++++--- app/core/audio_manager.py | 309 ++++++++++++++++++++++++++++++++++---- app/core/config.py | 14 ++ 7 files changed, 480 insertions(+), 74 deletions(-) diff --git a/.env.example b/.env.example index 95251de..a4a03dd 100644 --- a/.env.example +++ b/.env.example @@ -30,6 +30,11 @@ ANTHROPIC_API_VERSION=2023-06-01 DEEPGRAM_API_KEY=your_deepgram_api_key_here PORCUPINE_ACCESS_KEY=your_porcupine_access_key_here PORCUPINE_SENSITIVITY=0.8 +# Optional audio device overrides (substring match by name or exact PortAudio index) +# AUDIO_INPUT_DEVICE_NAME=pulse +# AUDIO_INPUT_DEVICE_INDEX=2 +# AUDIO_OUTPUT_DEVICE_NAME=pulse +# AUDIO_OUTPUT_DEVICE_INDEX=5 TTS_EN_SPEAKER=en_0 WEATHER_LAT=63.56 WEATHER_LON=53.69 diff --git a/README.md b/README.md index 61d574d..0b687b5 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,12 @@ python run.py После запуска ассистент перейдет в режим ожидания фразы `Alexandr`. +### Кросс-платформенный аудио режим + +- Приложение автоматически подбирает рабочий микрофон/динамик через PortAudio. +- Если основное аудио-устройство не подходит, включается fallback по другим устройствам и sample rate. +- При проблемах можно явно задать устройство через `.env` (`AUDIO_*_DEVICE_NAME` или `AUDIO_*_DEVICE_INDEX`). + ## Конфигурация `.env` | Переменная | Обязательно | По умолчанию | Назначение | @@ -142,6 +148,10 @@ python run.py | `DEEPGRAM_API_KEY` | Да | - | Ключ Deepgram STT | | `PORCUPINE_ACCESS_KEY` | Да | - | Ключ PicoVoice Porcupine | | `PORCUPINE_SENSITIVITY` | Нет | `0.8` | Чувствительность wake word | +| `AUDIO_INPUT_DEVICE_NAME` | Нет | auto | Подстрока имени микрофона (например `pulse`), если нужно выбрать конкретный input device | +| `AUDIO_INPUT_DEVICE_INDEX` | Нет | auto | Индекс PortAudio для микрофона (приоритетнее `AUDIO_INPUT_DEVICE_NAME`) | +| `AUDIO_OUTPUT_DEVICE_NAME` | Нет | auto | Подстрока имени динамика/выхода (например `pulse`) | +| `AUDIO_OUTPUT_DEVICE_INDEX` | Нет | auto | Индекс PortAudio для вывода (приоритетнее `AUDIO_OUTPUT_DEVICE_NAME`) | | `TTS_EN_SPEAKER` | Нет | `en_0` | Английский голос TTS | | `WEATHER_LAT` | Нет | - | Широта города по умолчанию | | `WEATHER_LON` | Нет | - | Долгота города по умолчанию | @@ -209,6 +219,7 @@ alexander_smart-speaker/ | Не реагирует на `Alexandr` | `PORCUPINE_ACCESS_KEY`, микрофон, чувствительность `PORCUPINE_SENSITIVITY` | | STT не распознает речь | `DEEPGRAM_API_KEY`, сетевой доступ, выбранный микрофон | | Нет звука | корректное аудиоустройство и доступность `pactl`/`amixer` | +| `Audio input/output initialization failed` | проверить, что звук-сервер запущен (PipeWire/PulseAudio), и при необходимости задать `AUDIO_INPUT_DEVICE_NAME`/`AUDIO_OUTPUT_DEVICE_NAME` | | Будильник/таймер не звонит | наличие `mpg123` в системе | | Ошибка про несколько AI API | в `.env` должен остаться только один незакомментированный AI ключ | | Spotify не управляется | заполнены `SPOTIFY_*`, есть активное устройство, Premium-аккаунт | diff --git a/app/audio/stt.py b/app/audio/stt.py index aee87cd..851a97a 100644 --- a/app/audio/stt.py +++ b/app/audio/stt.py @@ -83,10 +83,12 @@ class SpeechRecognizer: def __init__(self): self.dg_client = None self.pa = None + self.audio_manager = None self.stream = None self.transcript = "" self.last_successful_operation = datetime.now() self._input_device_index = None + self._stream_sample_rate = SAMPLE_RATE def initialize(self): """Инициализация клиента Deepgram и PyAudio.""" @@ -103,9 +105,9 @@ class SpeechRecognizer: print(f"❌ Ошибка при создании клиента Deepgram: {e}") raise - audio_manager = get_audio_manager() - self.pa = audio_manager.get_pyaudio() - self._input_device_index = audio_manager.get_input_device_index() + self.audio_manager = get_audio_manager() + self.pa = self.audio_manager.get_pyaudio() + self._input_device_index = self.audio_manager.get_input_device_index() print("✅ Deepgram клиент готов") # Обновляем время последней успешной операции self.last_successful_operation = datetime.now() @@ -131,18 +133,23 @@ class SpeechRecognizer: def _get_stream(self): """Открывает аудиопоток PyAudio, если он еще не открыт.""" if self.stream is None: - kwargs = {} - if self._input_device_index is not None: - kwargs["input_device_index"] = self._input_device_index - - self.stream = self.pa.open( - rate=SAMPLE_RATE, - channels=1, - format=pyaudio.paInt16, - input=True, - frames_per_buffer=4096, - **kwargs, + if self.audio_manager is None: + self.audio_manager = get_audio_manager() + self.stream, self._input_device_index, self._stream_sample_rate = ( + self.audio_manager.open_input_stream( + rate=SAMPLE_RATE, + channels=1, + format=pyaudio.paInt16, + frames_per_buffer=4096, + preferred_index=self._input_device_index, + fallback_rates=[48000, 44100, 32000, 22050, 16000, 8000], + ) ) + if self._stream_sample_rate != SAMPLE_RATE: + print( + f"⚠️ STT mic stream uses fallback rate={self._stream_sample_rate} " + f"(requested {SAMPLE_RATE})" + ) return self.stream async def _process_audio( @@ -242,7 +249,7 @@ class SpeechRecognizer: smart_format=True, # Расстановка знаков препинания encoding="linear16", channels=1, - sample_rate=SAMPLE_RATE, + sample_rate=self._stream_sample_rate, interim_results=True, utterance_end_ms=int(POST_SPEECH_SILENCE_TIMEOUT_SECONDS * 1000), vad_events=True, diff --git a/app/audio/tts.py b/app/audio/tts.py index f91325c..077f435 100644 --- a/app/audio/tts.py +++ b/app/audio/tts.py @@ -14,9 +14,11 @@ import time import warnings import numpy as np +import pyaudio import sounddevice as sd import torch +from ..core.audio_manager import get_audio_manager from ..core.config import TTS_EN_SPEAKER, TTS_SAMPLE_RATE, TTS_SPEAKER # Подавляем предупреждения Silero о длинном тексте (мы сами его режем) @@ -36,6 +38,8 @@ class TextToSpeech: self.speaker_en = TTS_EN_SPEAKER self._interrupted = False self._stop_flag = threading.Event() + self._audio_manager = None + self._output_device_index = None def _load_model(self, language: str): """ @@ -232,14 +236,13 @@ class TextToSpeech: audio_np = audio.numpy() if check_interrupt: - # Воспроизведение с проверкой прерывания (сложная логика) - if not self._play_with_interrupt(audio_np, check_interrupt): + if not self._play_audio_with_interrupt(audio_np, check_interrupt): success = False break else: - # Обычное воспроизведение (блокирующее) - sd.play(audio_np, self.sample_rate) - sd.wait() + if not self._play_audio_blocking(audio_np): + success = False + break except Exception as e: print(f"❌ Ошибка TTS (часть {i + 1}/{total_chunks}): {e}") @@ -293,6 +296,75 @@ class TextToSpeech: text, check_interrupt=check_interrupt, language=language ) + def _resample_audio(self, audio_np: np.ndarray, src_rate: int, dst_rate: int): + if src_rate == dst_rate: + return audio_np.astype(np.float32, copy=False) + if audio_np.size == 0: + return np.asarray([], dtype=np.float32) + + target_length = max(1, int(round(audio_np.size * dst_rate / src_rate))) + x_old = np.arange(audio_np.size, dtype=np.float32) + x_new = np.linspace(0.0, float(max(0, audio_np.size - 1)), target_length) + resampled = np.interp(x_new, x_old, audio_np.astype(np.float32)) + return np.asarray(resampled, dtype=np.float32) + + def _play_audio_blocking(self, audio_np: np.ndarray) -> bool: + try: + sd.play(audio_np, self.sample_rate) + sd.wait() + return True + except Exception as exc: + print(f"⚠️ sounddevice playback failed, fallback to PyAudio: {exc}") + return self._play_with_pyaudio(audio_np, check_interrupt=None) + + def _play_audio_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool: + try: + return self._play_with_interrupt_sounddevice(audio_np, check_interrupt) + except Exception as exc: + print( + "⚠️ sounddevice playback-with-interrupt failed, fallback to PyAudio: " + f"{exc}" + ) + return self._play_with_pyaudio(audio_np, check_interrupt=check_interrupt) + + def _play_with_pyaudio(self, audio_np: np.ndarray, check_interrupt=None) -> bool: + if self._audio_manager is None: + self._audio_manager = get_audio_manager() + + output_stream = None + try: + output_stream, self._output_device_index, out_rate = ( + self._audio_manager.open_output_stream( + rate=self.sample_rate, + channels=1, + format=pyaudio.paFloat32, + preferred_index=self._output_device_index, + fallback_rates=[48000, 44100, 32000, 22050], + ) + ) + pcm = self._resample_audio(audio_np, self.sample_rate, out_rate) + chunk_size = max(256, int(out_rate * 0.03)) + + for offset in range(0, len(pcm), chunk_size): + if check_interrupt and check_interrupt(): + self._interrupted = True + return False + output_stream.write(pcm[offset : offset + chunk_size].tobytes()) + return True + except Exception as exc: + print(f"❌ PyAudio playback failed: {exc}") + return False + finally: + if output_stream is not None: + try: + output_stream.stop_stream() + except Exception: + pass + try: + output_stream.close() + except Exception: + pass + def _check_interrupt_worker(self, check_interrupt): """ Фоновая функция для потока: постоянно опрашивает check_interrupt. @@ -308,7 +380,9 @@ class TextToSpeech: except Exception: pass - def _play_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool: + def _play_with_interrupt_sounddevice( + self, audio_np: np.ndarray, check_interrupt + ) -> bool: """ Воспроизводит аудио, параллельно проверяя условие прерывания в отдельном потоке. """ diff --git a/app/audio/wakeword.py b/app/audio/wakeword.py index 9a1e32c..d834a8c 100644 --- a/app/audio/wakeword.py +++ b/app/audio/wakeword.py @@ -9,6 +9,7 @@ Listens for the "Alexandr" wake word. import pvporcupine import pyaudio import struct +import numpy as np from ..core.config import ( PORCUPINE_ACCESS_KEY, PORCUPINE_KEYWORD_PATH, @@ -24,6 +25,11 @@ class WakeWordDetector: self.porcupine = None self.audio_stream = None self.pa = None + self._audio_manager = None + self._input_device_index = None + self._capture_sample_rate = None + self._capture_frame_length = None + self._resampled_pcm_buffer = np.array([], dtype=np.int16) self._stream_closed = True # Флаг состояния потока (закрыт/открыт) self._last_hit_ts = 0.0 @@ -37,11 +43,13 @@ class WakeWordDetector: ) # Используем общий экземпляр PyAudio - audio_manager = get_audio_manager() - self.pa = audio_manager.get_pyaudio() - self._input_device_index = audio_manager.get_input_device_index() + self._audio_manager = get_audio_manager() + self.pa = self._audio_manager.get_pyaudio() self._open_stream() - print(f"🎤 Ожидание wake word 'Alexandr' (sens={PORCUPINE_SENSITIVITY:.2f})...") + print( + "🎤 Ожидание wake word 'Alexandr' " + f"(sens={PORCUPINE_SENSITIVITY:.2f}, mic_rate={self._capture_sample_rate})..." + ) def _open_stream(self): """Открытие аудиопотока с микрофона.""" @@ -55,19 +63,28 @@ class WakeWordDetector: except Exception: pass - # Открываем поток с параметрами, которые требует Porcupine - kwargs = {} - if getattr(self, "_input_device_index", None) is not None: - kwargs["input_device_index"] = self._input_device_index - - self.audio_stream = self.pa.open( - rate=self.porcupine.sample_rate, + target_rate = int(self.porcupine.sample_rate) + fallback_rates = [48000, 44100, 32000, 22050, 16000] + self.audio_stream, self._input_device_index, actual_rate = self._audio_manager.open_input_stream( + rate=target_rate, channels=1, format=pyaudio.paInt16, - input=True, frames_per_buffer=self.porcupine.frame_length, - **kwargs, + preferred_index=self._input_device_index, + fallback_rates=fallback_rates, ) + self._capture_sample_rate = int(actual_rate) + self._capture_frame_length = max( + 64, + int( + round( + self.porcupine.frame_length + * self._capture_sample_rate + / target_rate + ) + ), + ) + self._resampled_pcm_buffer = np.array([], dtype=np.int16) self._stream_closed = False def stop_monitoring(self): @@ -80,6 +97,40 @@ class WakeWordDetector: pass self._stream_closed = True + def _resample_to_target_rate(self, pcm: np.ndarray) -> np.ndarray: + target_rate = int(self.porcupine.sample_rate) + source_rate = int(self._capture_sample_rate or target_rate) + if source_rate == target_rate: + return pcm + if pcm.size == 0: + return np.array([], dtype=np.int16) + target_length = max(1, int(round(pcm.size * target_rate / source_rate))) + x_old = np.arange(pcm.size, dtype=np.float32) + x_new = np.linspace(0.0, float(max(0, pcm.size - 1)), target_length) + resampled = np.interp(x_new, x_old, pcm.astype(np.float32)) + return np.asarray(resampled, dtype=np.int16) + + def _read_porcupine_frame(self): + target_length = int(self.porcupine.frame_length) + if self._capture_sample_rate == self.porcupine.sample_rate: + pcm = self.audio_stream.read(target_length, exception_on_overflow=False) + return np.asarray(struct.unpack_from("h" * target_length, pcm), dtype=np.int16) + + while self._resampled_pcm_buffer.size < target_length: + raw = self.audio_stream.read( + self._capture_frame_length, exception_on_overflow=False + ) + captured = np.frombuffer(raw, dtype=np.int16) + converted = self._resample_to_target_rate(captured) + if converted.size: + self._resampled_pcm_buffer = np.concatenate( + (self._resampled_pcm_buffer, converted) + ) + + frame = self._resampled_pcm_buffer[:target_length] + self._resampled_pcm_buffer = self._resampled_pcm_buffer[target_length:] + return frame + def wait_for_wakeword(self, timeout: float = None) -> bool: """ Блокирующая функция: ждет, пока не будет услышана фраза "Alexandr" @@ -107,14 +158,10 @@ class WakeWordDetector: return False # Читаем небольшой кусочек аудио (frame) - pcm = self.audio_stream.read( - self.porcupine.frame_length, exception_on_overflow=False - ) - # Конвертируем байты в кортеж чисел (требование Porcupine) - pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm) + pcm = self._read_porcupine_frame() # Обрабатываем фрейм через Porcupine - keyword_index = self.porcupine.process(pcm) + keyword_index = self.porcupine.process(pcm.tolist()) # Если keyword_index >= 0, значит ключевое слово обнаружено if keyword_index >= 0: @@ -140,12 +187,9 @@ class WakeWordDetector: try: self._open_stream() - pcm = self.audio_stream.read( - self.porcupine.frame_length, exception_on_overflow=False - ) - pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm) + pcm = self._read_porcupine_frame() - keyword_index = self.porcupine.process(pcm) + keyword_index = self.porcupine.process(pcm.tolist()) if keyword_index >= 0: now = time.time() if now - self._last_hit_ts < 0.2: # Уменьшаем интервал для более быстрой реакции diff --git a/app/core/audio_manager.py b/app/core/audio_manager.py index 47e21f3..93c228d 100644 --- a/app/core/audio_manager.py +++ b/app/core/audio_manager.py @@ -1,7 +1,12 @@ import pyaudio import threading -from .config import AUDIO_INPUT_DEVICE_INDEX, AUDIO_INPUT_DEVICE_NAME +from .config import ( + AUDIO_INPUT_DEVICE_INDEX, + AUDIO_INPUT_DEVICE_NAME, + AUDIO_OUTPUT_DEVICE_INDEX, + AUDIO_OUTPUT_DEVICE_NAME, +) class AudioManager: @@ -14,7 +19,9 @@ class AudioManager: cls._instance = super(AudioManager, cls).__new__(cls) cls._instance.pa = pyaudio.PyAudio() cls._instance._input_device_index = None + cls._instance._output_device_index = None cls._instance._input_device_resolved = False + cls._instance._output_device_resolved = False print("🔊 AudioManager: PyAudio initialized (Global)") return cls._instance @@ -33,22 +40,84 @@ class AudioManager: self._input_device_resolved = True return self._input_device_index + def get_output_device_index(self): + """ + Returns PortAudio output device index or None (let PortAudio pick default). + Raises a RuntimeError with a helpful message if no output devices exist. + """ + if self._output_device_resolved: + return self._output_device_index + + self._output_device_index = self._resolve_output_device_index() + self._output_device_resolved = True + return self._output_device_index + + def _get_device_count(self) -> int: + if self.pa is None: + return 0 + return int(self.pa.get_device_count() or 0) + + def _is_input_device(self, idx: int) -> bool: + try: + info = self.pa.get_device_info_by_index(idx) + except Exception: + return False + return int(info.get("maxInputChannels") or 0) > 0 + + def _is_output_device(self, idx: int) -> bool: + try: + info = self.pa.get_device_info_by_index(idx) + except Exception: + return False + return int(info.get("maxOutputChannels") or 0) > 0 + + def _find_device_by_name(self, needle: str, input_kind: bool): + if not needle: + return None + lowered = needle.lower() + count = self._get_device_count() + for idx in range(count): + if input_kind and not self._is_input_device(idx): + continue + if not input_kind and not self._is_output_device(idx): + continue + try: + name = str(self.pa.get_device_info_by_index(idx).get("name") or "") + except Exception: + continue + if lowered in name.lower(): + return idx + return None + + def _get_default_input_index(self): + try: + info = self.pa.get_default_input_device_info() + idx = int(info.get("index")) + if self._is_input_device(idx): + return idx + except Exception: + pass + return None + + def _get_default_output_index(self): + try: + info = self.pa.get_default_output_device_info() + idx = int(info.get("index")) + if self._is_output_device(idx): + return idx + except Exception: + pass + return None + def _resolve_input_device_index(self): if self.pa is None: return None - device_count = int(self.pa.get_device_count() or 0) - - def is_input_device(idx: int) -> bool: - try: - info = self.pa.get_device_info_by_index(idx) - except Exception: - return False - return int(info.get("maxInputChannels") or 0) > 0 + device_count = self._get_device_count() if AUDIO_INPUT_DEVICE_INDEX is not None: idx = int(AUDIO_INPUT_DEVICE_INDEX) - if 0 <= idx < device_count and is_input_device(idx): + if 0 <= idx < device_count and self._is_input_device(idx): return idx raise RuntimeError( "Audio input initialization failed: invalid AUDIO_INPUT_DEVICE_INDEX=" @@ -57,16 +126,9 @@ class AudioManager: ) if AUDIO_INPUT_DEVICE_NAME: - needle = AUDIO_INPUT_DEVICE_NAME.lower() - for idx in range(device_count): - if not is_input_device(idx): - continue - try: - name = str(self.pa.get_device_info_by_index(idx).get("name") or "") - except Exception: - continue - if needle in name.lower(): - return idx + match_idx = self._find_device_by_name(AUDIO_INPUT_DEVICE_NAME, input_kind=True) + if match_idx is not None: + return match_idx raise RuntimeError( "Audio input initialization failed: could not find an input device " @@ -76,17 +138,13 @@ class AudioManager: ) # Default input device (if PortAudio has one). - try: - default_info = self.pa.get_default_input_device_info() - default_idx = int(default_info.get("index")) - if 0 <= default_idx < device_count and is_input_device(default_idx): - return default_idx - except Exception: - pass + default_idx = self._get_default_input_index() + if default_idx is not None: + return default_idx # Fallback: first input device. for idx in range(device_count): - if is_input_device(idx): + if self._is_input_device(idx): return idx raise RuntimeError( @@ -96,12 +154,185 @@ class AudioManager: + self.describe_input_devices() ) + def _resolve_output_device_index(self): + if self.pa is None: + return None + + device_count = self._get_device_count() + + if AUDIO_OUTPUT_DEVICE_INDEX is not None: + idx = int(AUDIO_OUTPUT_DEVICE_INDEX) + if 0 <= idx < device_count and self._is_output_device(idx): + return idx + raise RuntimeError( + "Audio output initialization failed: invalid AUDIO_OUTPUT_DEVICE_INDEX=" + f"{AUDIO_OUTPUT_DEVICE_INDEX}. Available output devices:\n" + + self.describe_output_devices() + ) + + if AUDIO_OUTPUT_DEVICE_NAME: + match_idx = self._find_device_by_name( + AUDIO_OUTPUT_DEVICE_NAME, input_kind=False + ) + if match_idx is not None: + return match_idx + raise RuntimeError( + "Audio output initialization failed: could not find an output device " + f"matching AUDIO_OUTPUT_DEVICE_NAME={AUDIO_OUTPUT_DEVICE_NAME!r}. " + "Available output devices:\n" + + self.describe_output_devices() + ) + + default_idx = self._get_default_output_index() + if default_idx is not None: + return default_idx + + for idx in range(device_count): + if self._is_output_device(idx): + return idx + + raise RuntimeError( + "Audio output initialization failed: no output devices found. " + "Check speaker connection and PipeWire/PulseAudio. " + "PortAudio devices:\n" + + self.describe_output_devices() + ) + + def _ordered_input_candidates(self, preferred_index=None): + candidates = [] + + def add(idx): + if idx not in candidates: + candidates.append(idx) + + if preferred_index is not None: + add(preferred_index) + else: + try: + add(self.get_input_device_index()) + except Exception: + pass + add(self._get_default_input_index()) + add(None) # Let PortAudio decide default path. + for idx in range(self._get_device_count()): + if self._is_input_device(idx): + add(idx) + + return [idx for idx in candidates if idx is None or self._is_input_device(idx)] + + def _ordered_output_candidates(self, preferred_index=None): + candidates = [] + + def add(idx): + if idx not in candidates: + candidates.append(idx) + + if preferred_index is not None: + add(preferred_index) + else: + try: + add(self.get_output_device_index()) + except Exception: + pass + add(self._get_default_output_index()) + add(None) # Let PortAudio decide default path. + for idx in range(self._get_device_count()): + if self._is_output_device(idx): + add(idx) + + return [idx for idx in candidates if idx is None or self._is_output_device(idx)] + + def open_input_stream( + self, + *, + rate: int, + channels: int, + format, + frames_per_buffer: int, + preferred_index=None, + fallback_rates=None, + ): + if self.pa is None: + raise RuntimeError("PyAudio is not initialized") + + fallback_rates = fallback_rates or [] + rates = [int(rate)] + [int(r) for r in fallback_rates if int(r) > 0 and int(r) != int(rate)] + errors = [] + + for device_idx in self._ordered_input_candidates(preferred_index): + for attempt_rate in rates: + fb = max( + 64, int(round(frames_per_buffer * attempt_rate / max(1, int(rate)))) + ) + kwargs = { + "rate": attempt_rate, + "channels": channels, + "format": format, + "input": True, + "frames_per_buffer": fb, + } + if device_idx is not None: + kwargs["input_device_index"] = device_idx + try: + stream = self.pa.open(**kwargs) + return stream, device_idx, attempt_rate + except Exception as exc: + errors.append( + f"device={device_idx!r}, rate={attempt_rate}: {exc}" + ) + + joined_errors = "\n".join(errors[:12]) + raise RuntimeError( + "Audio input initialization failed. Tried multiple devices/rates.\n" + f"{joined_errors}\nAvailable input devices:\n{self.describe_input_devices()}" + ) + + def open_output_stream( + self, + *, + rate: int, + channels: int, + format, + preferred_index=None, + fallback_rates=None, + ): + if self.pa is None: + raise RuntimeError("PyAudio is not initialized") + + fallback_rates = fallback_rates or [] + rates = [int(rate)] + [int(r) for r in fallback_rates if int(r) > 0 and int(r) != int(rate)] + errors = [] + + for device_idx in self._ordered_output_candidates(preferred_index): + for attempt_rate in rates: + kwargs = { + "rate": attempt_rate, + "channels": channels, + "format": format, + "output": True, + } + if device_idx is not None: + kwargs["output_device_index"] = device_idx + try: + stream = self.pa.open(**kwargs) + return stream, device_idx, attempt_rate + except Exception as exc: + errors.append( + f"device={device_idx!r}, rate={attempt_rate}: {exc}" + ) + + joined_errors = "\n".join(errors[:12]) + raise RuntimeError( + "Audio output initialization failed. Tried multiple devices/rates.\n" + f"{joined_errors}\nAvailable output devices:\n{self.describe_output_devices()}" + ) + def describe_input_devices(self, limit: int = 20) -> str: if self.pa is None: return "" items = [] - count = int(self.pa.get_device_count() or 0) + count = self._get_device_count() for idx in range(count): try: info = self.pa.get_device_info_by_index(idx) @@ -116,6 +347,26 @@ class AudioManager: break return "\n".join(items) if items else "" + def describe_output_devices(self, limit: int = 20) -> str: + if self.pa is None: + return "" + + items = [] + count = self._get_device_count() + for idx in range(count): + try: + info = self.pa.get_device_info_by_index(idx) + except Exception: + continue + max_out = int(info.get("maxOutputChannels") or 0) + if max_out <= 0: + continue + name = str(info.get("name") or "").strip() + items.append(f"[{idx}] {name} (out={max_out})") + if len(items) >= limit: + break + return "\n".join(items) if items else "" + def cleanup(self): if self.pa: self.pa.terminate() diff --git a/app/core/config.py b/app/core/config.py index 36bdba4..28c15c6 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -88,6 +88,20 @@ try: except Exception: AUDIO_INPUT_DEVICE_INDEX = None +# Выбор устройства вывода (динамик). +# Если не задано, используем default output device PortAudio (если есть). +# Пример: +# - AUDIO_OUTPUT_DEVICE_NAME=pulse +# - AUDIO_OUTPUT_DEVICE_INDEX=5 +AUDIO_OUTPUT_DEVICE_NAME = os.getenv("AUDIO_OUTPUT_DEVICE_NAME", "").strip() or None +_audio_out_index_raw = os.getenv("AUDIO_OUTPUT_DEVICE_INDEX", "").strip() +try: + AUDIO_OUTPUT_DEVICE_INDEX = ( + int(_audio_out_index_raw) if _audio_out_index_raw else None + ) +except Exception: + AUDIO_OUTPUT_DEVICE_INDEX = None + # --- Настройка времени --- # Устанавливаем часовой пояс на Москву, чтобы будильник работал корректно