feat: harden audio device compatibility across machines

2026-03-12 14:08:20 +03:00
parent e9f26f8050
commit 6c2702d5e3
7 changed files with 480 additions and 74 deletions
--- a/app/audio/stt.py
+++ b/app/audio/stt.py
@@ -83,10 +83,12 @@ class SpeechRecognizer:
    def __init__(self):
        self.dg_client = None
        self.pa = None
+        self.audio_manager = None
        self.stream = None
        self.transcript = ""
        self.last_successful_operation = datetime.now()
        self._input_device_index = None
+        self._stream_sample_rate = SAMPLE_RATE

    def initialize(self):
        """Инициализация клиента Deepgram и PyAudio."""
@@ -103,9 +105,9 @@ class SpeechRecognizer:
            print(f"❌ Ошибка при создании клиента Deepgram: {e}")
            raise

-        audio_manager = get_audio_manager()
-        self.pa = audio_manager.get_pyaudio()
-        self._input_device_index = audio_manager.get_input_device_index()
+        self.audio_manager = get_audio_manager()
+        self.pa = self.audio_manager.get_pyaudio()
+        self._input_device_index = self.audio_manager.get_input_device_index()
        print("✅ Deepgram клиент готов")
        # Обновляем время последней успешной операции
        self.last_successful_operation = datetime.now()
@@ -131,18 +133,23 @@ class SpeechRecognizer:
    def _get_stream(self):
        """Открывает аудиопоток PyAudio, если он еще не открыт."""
        if self.stream is None:
-            kwargs = {}
-            if self._input_device_index is not None:
-                kwargs["input_device_index"] = self._input_device_index
-
-            self.stream = self.pa.open(
-                rate=SAMPLE_RATE,
-                channels=1,
-                format=pyaudio.paInt16,
-                input=True,
-                frames_per_buffer=4096,
-                **kwargs,
+            if self.audio_manager is None:
+                self.audio_manager = get_audio_manager()
+            self.stream, self._input_device_index, self._stream_sample_rate = (
+                self.audio_manager.open_input_stream(
+                    rate=SAMPLE_RATE,
+                    channels=1,
+                    format=pyaudio.paInt16,
+                    frames_per_buffer=4096,
+                    preferred_index=self._input_device_index,
+                    fallback_rates=[48000, 44100, 32000, 22050, 16000, 8000],
+                )
            )
+            if self._stream_sample_rate != SAMPLE_RATE:
+                print(
+                    f"⚠️ STT mic stream uses fallback rate={self._stream_sample_rate} "
+                    f"(requested {SAMPLE_RATE})"
+                )
        return self.stream

    async def _process_audio(
@@ -242,7 +249,7 @@ class SpeechRecognizer:
            smart_format=True,  # Расстановка знаков препинания
            encoding="linear16",
            channels=1,
-            sample_rate=SAMPLE_RATE,
+            sample_rate=self._stream_sample_rate,
            interim_results=True,
            utterance_end_ms=int(POST_SPEECH_SILENCE_TIMEOUT_SECONDS * 1000),
            vad_events=True,
--- a/app/audio/tts.py
+++ b/app/audio/tts.py
@@ -14,9 +14,11 @@ import time
 import warnings

 import numpy as np
+import pyaudio
 import sounddevice as sd
 import torch

+from ..core.audio_manager import get_audio_manager
 from ..core.config import TTS_EN_SPEAKER, TTS_SAMPLE_RATE, TTS_SPEAKER

 # Подавляем предупреждения Silero о длинном тексте (мы сами его режем)
@@ -36,6 +38,8 @@ class TextToSpeech:
        self.speaker_en = TTS_EN_SPEAKER
        self._interrupted = False
        self._stop_flag = threading.Event()
+        self._audio_manager = None
+        self._output_device_index = None

    def _load_model(self, language: str):
        """
@@ -232,14 +236,13 @@ class TextToSpeech:
                audio_np = audio.numpy()

                if check_interrupt:
-                    # Воспроизведение с проверкой прерывания (сложная логика)
-                    if not self._play_with_interrupt(audio_np, check_interrupt):
+                    if not self._play_audio_with_interrupt(audio_np, check_interrupt):
                        success = False
                        break
                else:
-                    # Обычное воспроизведение (блокирующее)
-                    sd.play(audio_np, self.sample_rate)
-                    sd.wait()
+                    if not self._play_audio_blocking(audio_np):
+                        success = False
+                        break

            except Exception as e:
                print(f"❌ Ошибка TTS (часть {i + 1}/{total_chunks}): {e}")
@@ -293,6 +296,75 @@ class TextToSpeech:
            text, check_interrupt=check_interrupt, language=language
        )

+    def _resample_audio(self, audio_np: np.ndarray, src_rate: int, dst_rate: int):
+        if src_rate == dst_rate:
+            return audio_np.astype(np.float32, copy=False)
+        if audio_np.size == 0:
+            return np.asarray([], dtype=np.float32)
+
+        target_length = max(1, int(round(audio_np.size * dst_rate / src_rate)))
+        x_old = np.arange(audio_np.size, dtype=np.float32)
+        x_new = np.linspace(0.0, float(max(0, audio_np.size - 1)), target_length)
+        resampled = np.interp(x_new, x_old, audio_np.astype(np.float32))
+        return np.asarray(resampled, dtype=np.float32)
+
+    def _play_audio_blocking(self, audio_np: np.ndarray) -> bool:
+        try:
+            sd.play(audio_np, self.sample_rate)
+            sd.wait()
+            return True
+        except Exception as exc:
+            print(f"⚠️ sounddevice playback failed, fallback to PyAudio: {exc}")
+            return self._play_with_pyaudio(audio_np, check_interrupt=None)
+
+    def _play_audio_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
+        try:
+            return self._play_with_interrupt_sounddevice(audio_np, check_interrupt)
+        except Exception as exc:
+            print(
+                "⚠️ sounddevice playback-with-interrupt failed, fallback to PyAudio: "
+                f"{exc}"
+            )
+            return self._play_with_pyaudio(audio_np, check_interrupt=check_interrupt)
+
+    def _play_with_pyaudio(self, audio_np: np.ndarray, check_interrupt=None) -> bool:
+        if self._audio_manager is None:
+            self._audio_manager = get_audio_manager()
+
+        output_stream = None
+        try:
+            output_stream, self._output_device_index, out_rate = (
+                self._audio_manager.open_output_stream(
+                    rate=self.sample_rate,
+                    channels=1,
+                    format=pyaudio.paFloat32,
+                    preferred_index=self._output_device_index,
+                    fallback_rates=[48000, 44100, 32000, 22050],
+                )
+            )
+            pcm = self._resample_audio(audio_np, self.sample_rate, out_rate)
+            chunk_size = max(256, int(out_rate * 0.03))
+
+            for offset in range(0, len(pcm), chunk_size):
+                if check_interrupt and check_interrupt():
+                    self._interrupted = True
+                    return False
+                output_stream.write(pcm[offset : offset + chunk_size].tobytes())
+            return True
+        except Exception as exc:
+            print(f"❌ PyAudio playback failed: {exc}")
+            return False
+        finally:
+            if output_stream is not None:
+                try:
+                    output_stream.stop_stream()
+                except Exception:
+                    pass
+                try:
+                    output_stream.close()
+                except Exception:
+                    pass
+
    def _check_interrupt_worker(self, check_interrupt):
        """
        Фоновая функция для потока: постоянно опрашивает check_interrupt.
@@ -308,7 +380,9 @@ class TextToSpeech:
            except Exception:
                pass

-    def _play_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
+    def _play_with_interrupt_sounddevice(
+        self, audio_np: np.ndarray, check_interrupt
+    ) -> bool:
        """
        Воспроизводит аудио, параллельно проверяя условие прерывания в отдельном потоке.
        """
--- a/app/audio/wakeword.py
+++ b/app/audio/wakeword.py
@@ -9,6 +9,7 @@ Listens for the "Alexandr" wake word.
 import pvporcupine
 import pyaudio
 import struct
+import numpy as np
 from ..core.config import (
    PORCUPINE_ACCESS_KEY,
    PORCUPINE_KEYWORD_PATH,
@@ -24,6 +25,11 @@ class WakeWordDetector:
        self.porcupine = None
        self.audio_stream = None
        self.pa = None
+        self._audio_manager = None
+        self._input_device_index = None
+        self._capture_sample_rate = None
+        self._capture_frame_length = None
+        self._resampled_pcm_buffer = np.array([], dtype=np.int16)
        self._stream_closed = True  # Флаг состояния потока (закрыт/открыт)
        self._last_hit_ts = 0.0

@@ -37,11 +43,13 @@ class WakeWordDetector:
        )

        # Используем общий экземпляр PyAudio
-        audio_manager = get_audio_manager()
-        self.pa = audio_manager.get_pyaudio()
-        self._input_device_index = audio_manager.get_input_device_index()
+        self._audio_manager = get_audio_manager()
+        self.pa = self._audio_manager.get_pyaudio()
        self._open_stream()
-        print(f"🎤 Ожидание wake word 'Alexandr' (sens={PORCUPINE_SENSITIVITY:.2f})...")
+        print(
+            "🎤 Ожидание wake word 'Alexandr' "
+            f"(sens={PORCUPINE_SENSITIVITY:.2f}, mic_rate={self._capture_sample_rate})..."
+        )

    def _open_stream(self):
        """Открытие аудиопотока с микрофона."""
@@ -55,19 +63,28 @@ class WakeWordDetector:
            except Exception:
                pass

-        # Открываем поток с параметрами, которые требует Porcupine
-        kwargs = {}
-        if getattr(self, "_input_device_index", None) is not None:
-            kwargs["input_device_index"] = self._input_device_index
-
-        self.audio_stream = self.pa.open(
-            rate=self.porcupine.sample_rate,
+        target_rate = int(self.porcupine.sample_rate)
+        fallback_rates = [48000, 44100, 32000, 22050, 16000]
+        self.audio_stream, self._input_device_index, actual_rate = self._audio_manager.open_input_stream(
+            rate=target_rate,
            channels=1,
            format=pyaudio.paInt16,
-            input=True,
            frames_per_buffer=self.porcupine.frame_length,
-            **kwargs,
+            preferred_index=self._input_device_index,
+            fallback_rates=fallback_rates,
        )
+        self._capture_sample_rate = int(actual_rate)
+        self._capture_frame_length = max(
+            64,
+            int(
+                round(
+                    self.porcupine.frame_length
+                    * self._capture_sample_rate
+                    / target_rate
+                )
+            ),
+        )
+        self._resampled_pcm_buffer = np.array([], dtype=np.int16)
        self._stream_closed = False

    def stop_monitoring(self):
@@ -80,6 +97,40 @@ class WakeWordDetector:
                pass
            self._stream_closed = True

+    def _resample_to_target_rate(self, pcm: np.ndarray) -> np.ndarray:
+        target_rate = int(self.porcupine.sample_rate)
+        source_rate = int(self._capture_sample_rate or target_rate)
+        if source_rate == target_rate:
+            return pcm
+        if pcm.size == 0:
+            return np.array([], dtype=np.int16)
+        target_length = max(1, int(round(pcm.size * target_rate / source_rate)))
+        x_old = np.arange(pcm.size, dtype=np.float32)
+        x_new = np.linspace(0.0, float(max(0, pcm.size - 1)), target_length)
+        resampled = np.interp(x_new, x_old, pcm.astype(np.float32))
+        return np.asarray(resampled, dtype=np.int16)
+
+    def _read_porcupine_frame(self):
+        target_length = int(self.porcupine.frame_length)
+        if self._capture_sample_rate == self.porcupine.sample_rate:
+            pcm = self.audio_stream.read(target_length, exception_on_overflow=False)
+            return np.asarray(struct.unpack_from("h" * target_length, pcm), dtype=np.int16)
+
+        while self._resampled_pcm_buffer.size < target_length:
+            raw = self.audio_stream.read(
+                self._capture_frame_length, exception_on_overflow=False
+            )
+            captured = np.frombuffer(raw, dtype=np.int16)
+            converted = self._resample_to_target_rate(captured)
+            if converted.size:
+                self._resampled_pcm_buffer = np.concatenate(
+                    (self._resampled_pcm_buffer, converted)
+                )
+
+        frame = self._resampled_pcm_buffer[:target_length]
+        self._resampled_pcm_buffer = self._resampled_pcm_buffer[target_length:]
+        return frame
+
    def wait_for_wakeword(self, timeout: float = None) -> bool:
        """
        Блокирующая функция: ждет, пока не будет услышана фраза "Alexandr"
@@ -107,14 +158,10 @@ class WakeWordDetector:
                return False

            # Читаем небольшой кусочек аудио (frame)
-            pcm = self.audio_stream.read(
-                self.porcupine.frame_length, exception_on_overflow=False
-            )
-            # Конвертируем байты в кортеж чисел (требование Porcupine)
-            pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
+            pcm = self._read_porcupine_frame()

            # Обрабатываем фрейм через Porcupine
-            keyword_index = self.porcupine.process(pcm)
+            keyword_index = self.porcupine.process(pcm.tolist())

            # Если keyword_index >= 0, значит ключевое слово обнаружено
            if keyword_index >= 0:
@@ -140,12 +187,9 @@ class WakeWordDetector:
        try:
            self._open_stream()

-            pcm = self.audio_stream.read(
-                self.porcupine.frame_length, exception_on_overflow=False
-            )
-            pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
+            pcm = self._read_porcupine_frame()

-            keyword_index = self.porcupine.process(pcm)
+            keyword_index = self.porcupine.process(pcm.tolist())
            if keyword_index >= 0:
                now = time.time()
                if now - self._last_hit_ts < 0.2:  # Уменьшаем интервал для более быстрой реакции
--- a/app/core/audio_manager.py
+++ b/app/core/audio_manager.py
@@ -1,7 +1,12 @@
 import pyaudio
 import threading

-from .config import AUDIO_INPUT_DEVICE_INDEX, AUDIO_INPUT_DEVICE_NAME
+from .config import (
+    AUDIO_INPUT_DEVICE_INDEX,
+    AUDIO_INPUT_DEVICE_NAME,
+    AUDIO_OUTPUT_DEVICE_INDEX,
+    AUDIO_OUTPUT_DEVICE_NAME,
+)


 class AudioManager:
@@ -14,7 +19,9 @@ class AudioManager:
                cls._instance = super(AudioManager, cls).__new__(cls)
                cls._instance.pa = pyaudio.PyAudio()
                cls._instance._input_device_index = None
+                cls._instance._output_device_index = None
                cls._instance._input_device_resolved = False
+                cls._instance._output_device_resolved = False
                print("🔊 AudioManager: PyAudio initialized (Global)")
            return cls._instance

@@ -33,22 +40,84 @@ class AudioManager:
        self._input_device_resolved = True
        return self._input_device_index

+    def get_output_device_index(self):
+        """
+        Returns PortAudio output device index or None (let PortAudio pick default).
+        Raises a RuntimeError with a helpful message if no output devices exist.
+        """
+        if self._output_device_resolved:
+            return self._output_device_index
+
+        self._output_device_index = self._resolve_output_device_index()
+        self._output_device_resolved = True
+        return self._output_device_index
+
+    def _get_device_count(self) -> int:
+        if self.pa is None:
+            return 0
+        return int(self.pa.get_device_count() or 0)
+
+    def _is_input_device(self, idx: int) -> bool:
+        try:
+            info = self.pa.get_device_info_by_index(idx)
+        except Exception:
+            return False
+        return int(info.get("maxInputChannels") or 0) > 0
+
+    def _is_output_device(self, idx: int) -> bool:
+        try:
+            info = self.pa.get_device_info_by_index(idx)
+        except Exception:
+            return False
+        return int(info.get("maxOutputChannels") or 0) > 0
+
+    def _find_device_by_name(self, needle: str, input_kind: bool):
+        if not needle:
+            return None
+        lowered = needle.lower()
+        count = self._get_device_count()
+        for idx in range(count):
+            if input_kind and not self._is_input_device(idx):
+                continue
+            if not input_kind and not self._is_output_device(idx):
+                continue
+            try:
+                name = str(self.pa.get_device_info_by_index(idx).get("name") or "")
+            except Exception:
+                continue
+            if lowered in name.lower():
+                return idx
+        return None
+
+    def _get_default_input_index(self):
+        try:
+            info = self.pa.get_default_input_device_info()
+            idx = int(info.get("index"))
+            if self._is_input_device(idx):
+                return idx
+        except Exception:
+            pass
+        return None
+
+    def _get_default_output_index(self):
+        try:
+            info = self.pa.get_default_output_device_info()
+            idx = int(info.get("index"))
+            if self._is_output_device(idx):
+                return idx
+        except Exception:
+            pass
+        return None
+
    def _resolve_input_device_index(self):
        if self.pa is None:
            return None

-        device_count = int(self.pa.get_device_count() or 0)
-
-        def is_input_device(idx: int) -> bool:
-            try:
-                info = self.pa.get_device_info_by_index(idx)
-            except Exception:
-                return False
-            return int(info.get("maxInputChannels") or 0) > 0
+        device_count = self._get_device_count()

        if AUDIO_INPUT_DEVICE_INDEX is not None:
            idx = int(AUDIO_INPUT_DEVICE_INDEX)
-            if 0 <= idx < device_count and is_input_device(idx):
+            if 0 <= idx < device_count and self._is_input_device(idx):
                return idx
            raise RuntimeError(
                "Audio input initialization failed: invalid AUDIO_INPUT_DEVICE_INDEX="
@@ -57,16 +126,9 @@ class AudioManager:
            )

        if AUDIO_INPUT_DEVICE_NAME:
-            needle = AUDIO_INPUT_DEVICE_NAME.lower()
-            for idx in range(device_count):
-                if not is_input_device(idx):
-                    continue
-                try:
-                    name = str(self.pa.get_device_info_by_index(idx).get("name") or "")
-                except Exception:
-                    continue
-                if needle in name.lower():
-                    return idx
+            match_idx = self._find_device_by_name(AUDIO_INPUT_DEVICE_NAME, input_kind=True)
+            if match_idx is not None:
+                return match_idx

            raise RuntimeError(
                "Audio input initialization failed: could not find an input device "
@@ -76,17 +138,13 @@ class AudioManager:
            )

        # Default input device (if PortAudio has one).
-        try:
-            default_info = self.pa.get_default_input_device_info()
-            default_idx = int(default_info.get("index"))
-            if 0 <= default_idx < device_count and is_input_device(default_idx):
-                return default_idx
-        except Exception:
-            pass
+        default_idx = self._get_default_input_index()
+        if default_idx is not None:
+            return default_idx

        # Fallback: first input device.
        for idx in range(device_count):
-            if is_input_device(idx):
+            if self._is_input_device(idx):
                return idx

        raise RuntimeError(
@@ -96,12 +154,185 @@ class AudioManager:
            + self.describe_input_devices()
        )

+    def _resolve_output_device_index(self):
+        if self.pa is None:
+            return None
+
+        device_count = self._get_device_count()
+
+        if AUDIO_OUTPUT_DEVICE_INDEX is not None:
+            idx = int(AUDIO_OUTPUT_DEVICE_INDEX)
+            if 0 <= idx < device_count and self._is_output_device(idx):
+                return idx
+            raise RuntimeError(
+                "Audio output initialization failed: invalid AUDIO_OUTPUT_DEVICE_INDEX="
+                f"{AUDIO_OUTPUT_DEVICE_INDEX}. Available output devices:\n"
+                + self.describe_output_devices()
+            )
+
+        if AUDIO_OUTPUT_DEVICE_NAME:
+            match_idx = self._find_device_by_name(
+                AUDIO_OUTPUT_DEVICE_NAME, input_kind=False
+            )
+            if match_idx is not None:
+                return match_idx
+            raise RuntimeError(
+                "Audio output initialization failed: could not find an output device "
+                f"matching AUDIO_OUTPUT_DEVICE_NAME={AUDIO_OUTPUT_DEVICE_NAME!r}. "
+                "Available output devices:\n"
+                + self.describe_output_devices()
+            )
+
+        default_idx = self._get_default_output_index()
+        if default_idx is not None:
+            return default_idx
+
+        for idx in range(device_count):
+            if self._is_output_device(idx):
+                return idx
+
+        raise RuntimeError(
+            "Audio output initialization failed: no output devices found. "
+            "Check speaker connection and PipeWire/PulseAudio. "
+            "PortAudio devices:\n"
+            + self.describe_output_devices()
+        )
+
+    def _ordered_input_candidates(self, preferred_index=None):
+        candidates = []
+
+        def add(idx):
+            if idx not in candidates:
+                candidates.append(idx)
+
+        if preferred_index is not None:
+            add(preferred_index)
+        else:
+            try:
+                add(self.get_input_device_index())
+            except Exception:
+                pass
+            add(self._get_default_input_index())
+            add(None)  # Let PortAudio decide default path.
+            for idx in range(self._get_device_count()):
+                if self._is_input_device(idx):
+                    add(idx)
+
+        return [idx for idx in candidates if idx is None or self._is_input_device(idx)]
+
+    def _ordered_output_candidates(self, preferred_index=None):
+        candidates = []
+
+        def add(idx):
+            if idx not in candidates:
+                candidates.append(idx)
+
+        if preferred_index is not None:
+            add(preferred_index)
+        else:
+            try:
+                add(self.get_output_device_index())
+            except Exception:
+                pass
+            add(self._get_default_output_index())
+            add(None)  # Let PortAudio decide default path.
+            for idx in range(self._get_device_count()):
+                if self._is_output_device(idx):
+                    add(idx)
+
+        return [idx for idx in candidates if idx is None or self._is_output_device(idx)]
+
+    def open_input_stream(
+        self,
+        *,
+        rate: int,
+        channels: int,
+        format,
+        frames_per_buffer: int,
+        preferred_index=None,
+        fallback_rates=None,
+    ):
+        if self.pa is None:
+            raise RuntimeError("PyAudio is not initialized")
+
+        fallback_rates = fallback_rates or []
+        rates = [int(rate)] + [int(r) for r in fallback_rates if int(r) > 0 and int(r) != int(rate)]
+        errors = []
+
+        for device_idx in self._ordered_input_candidates(preferred_index):
+            for attempt_rate in rates:
+                fb = max(
+                    64, int(round(frames_per_buffer * attempt_rate / max(1, int(rate))))
+                )
+                kwargs = {
+                    "rate": attempt_rate,
+                    "channels": channels,
+                    "format": format,
+                    "input": True,
+                    "frames_per_buffer": fb,
+                }
+                if device_idx is not None:
+                    kwargs["input_device_index"] = device_idx
+                try:
+                    stream = self.pa.open(**kwargs)
+                    return stream, device_idx, attempt_rate
+                except Exception as exc:
+                    errors.append(
+                        f"device={device_idx!r}, rate={attempt_rate}: {exc}"
+                    )
+
+        joined_errors = "\n".join(errors[:12])
+        raise RuntimeError(
+            "Audio input initialization failed. Tried multiple devices/rates.\n"
+            f"{joined_errors}\nAvailable input devices:\n{self.describe_input_devices()}"
+        )
+
+    def open_output_stream(
+        self,
+        *,
+        rate: int,
+        channels: int,
+        format,
+        preferred_index=None,
+        fallback_rates=None,
+    ):
+        if self.pa is None:
+            raise RuntimeError("PyAudio is not initialized")
+
+        fallback_rates = fallback_rates or []
+        rates = [int(rate)] + [int(r) for r in fallback_rates if int(r) > 0 and int(r) != int(rate)]
+        errors = []
+
+        for device_idx in self._ordered_output_candidates(preferred_index):
+            for attempt_rate in rates:
+                kwargs = {
+                    "rate": attempt_rate,
+                    "channels": channels,
+                    "format": format,
+                    "output": True,
+                }
+                if device_idx is not None:
+                    kwargs["output_device_index"] = device_idx
+                try:
+                    stream = self.pa.open(**kwargs)
+                    return stream, device_idx, attempt_rate
+                except Exception as exc:
+                    errors.append(
+                        f"device={device_idx!r}, rate={attempt_rate}: {exc}"
+                    )
+
+        joined_errors = "\n".join(errors[:12])
+        raise RuntimeError(
+            "Audio output initialization failed. Tried multiple devices/rates.\n"
+            f"{joined_errors}\nAvailable output devices:\n{self.describe_output_devices()}"
+        )
+
    def describe_input_devices(self, limit: int = 20) -> str:
        if self.pa is None:
            return "<PyAudio not initialized>"

        items = []
-        count = int(self.pa.get_device_count() or 0)
+        count = self._get_device_count()
        for idx in range(count):
            try:
                info = self.pa.get_device_info_by_index(idx)
@@ -116,6 +347,26 @@ class AudioManager:
                break
        return "\n".join(items) if items else "<no input devices>"

+    def describe_output_devices(self, limit: int = 20) -> str:
+        if self.pa is None:
+            return "<PyAudio not initialized>"
+
+        items = []
+        count = self._get_device_count()
+        for idx in range(count):
+            try:
+                info = self.pa.get_device_info_by_index(idx)
+            except Exception:
+                continue
+            max_out = int(info.get("maxOutputChannels") or 0)
+            if max_out <= 0:
+                continue
+            name = str(info.get("name") or "").strip()
+            items.append(f"[{idx}] {name} (out={max_out})")
+            if len(items) >= limit:
+                break
+        return "\n".join(items) if items else "<no output devices>"
+
    def cleanup(self):
        if self.pa:
            self.pa.terminate()
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -88,6 +88,20 @@ try:
 except Exception:
    AUDIO_INPUT_DEVICE_INDEX = None

+# Выбор устройства вывода (динамик).
+# Если не задано, используем default output device PortAudio (если есть).
+# Пример:
+# - AUDIO_OUTPUT_DEVICE_NAME=pulse
+# - AUDIO_OUTPUT_DEVICE_INDEX=5
+AUDIO_OUTPUT_DEVICE_NAME = os.getenv("AUDIO_OUTPUT_DEVICE_NAME", "").strip() or None
+_audio_out_index_raw = os.getenv("AUDIO_OUTPUT_DEVICE_INDEX", "").strip()
+try:
+    AUDIO_OUTPUT_DEVICE_INDEX = (
+        int(_audio_out_index_raw) if _audio_out_index_raw else None
+    )
+except Exception:
+    AUDIO_OUTPUT_DEVICE_INDEX = None
+
 # --- Настройка времени ---
 # Устанавливаем часовой пояс на Москву, чтобы будильник работал корректно