fix: select audio input device via env

2026-03-12 13:03:50 +03:00
parent 6769486e83
commit e9f26f8050
4 changed files with 128 additions and 2 deletions
--- a/app/audio/stt.py
+++ b/app/audio/stt.py
@@ -86,6 +86,7 @@ class SpeechRecognizer:
        self.stream = None
        self.transcript = ""
        self.last_successful_operation = datetime.now()
+        self._input_device_index = None

    def initialize(self):
        """Инициализация клиента Deepgram и PyAudio."""
@@ -102,7 +103,9 @@ class SpeechRecognizer:
            print(f"❌ Ошибка при создании клиента Deepgram: {e}")
            raise

-        self.pa = get_audio_manager().get_pyaudio()
+        audio_manager = get_audio_manager()
+        self.pa = audio_manager.get_pyaudio()
+        self._input_device_index = audio_manager.get_input_device_index()
        print("✅ Deepgram клиент готов")
        # Обновляем время последней успешной операции
        self.last_successful_operation = datetime.now()
@@ -128,12 +131,17 @@ class SpeechRecognizer:
    def _get_stream(self):
        """Открывает аудиопоток PyAudio, если он еще не открыт."""
        if self.stream is None:
+            kwargs = {}
+            if self._input_device_index is not None:
+                kwargs["input_device_index"] = self._input_device_index
+
            self.stream = self.pa.open(
                rate=SAMPLE_RATE,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=4096,
+                **kwargs,
            )
        return self.stream

--- a/app/audio/wakeword.py
+++ b/app/audio/wakeword.py
@@ -37,7 +37,9 @@ class WakeWordDetector:
        )

        # Используем общий экземпляр PyAudio
-        self.pa = get_audio_manager().get_pyaudio()
+        audio_manager = get_audio_manager()
+        self.pa = audio_manager.get_pyaudio()
+        self._input_device_index = audio_manager.get_input_device_index()
        self._open_stream()
        print(f"🎤 Ожидание wake word 'Alexandr' (sens={PORCUPINE_SENSITIVITY:.2f})...")

@@ -54,12 +56,17 @@ class WakeWordDetector:
                pass

        # Открываем поток с параметрами, которые требует Porcupine
+        kwargs = {}
+        if getattr(self, "_input_device_index", None) is not None:
+            kwargs["input_device_index"] = self._input_device_index
+
        self.audio_stream = self.pa.open(
            rate=self.porcupine.sample_rate,
            channels=1,
            format=pyaudio.paInt16,
            input=True,
            frames_per_buffer=self.porcupine.frame_length,
+            **kwargs,
        )
        self._stream_closed = False

--- a/app/core/audio_manager.py
+++ b/app/core/audio_manager.py
@@ -1,6 +1,8 @@
 import pyaudio
 import threading

+from .config import AUDIO_INPUT_DEVICE_INDEX, AUDIO_INPUT_DEVICE_NAME
+

 class AudioManager:
    _instance = None
@@ -11,12 +13,109 @@ class AudioManager:
            if cls._instance is None:
                cls._instance = super(AudioManager, cls).__new__(cls)
                cls._instance.pa = pyaudio.PyAudio()
+                cls._instance._input_device_index = None
+                cls._instance._input_device_resolved = False
                print("🔊 AudioManager: PyAudio initialized (Global)")
            return cls._instance

    def get_pyaudio(self):
        return self.pa

+    def get_input_device_index(self):
+        """
+        Returns PortAudio input device index or None (let PortAudio pick default).
+        Raises a RuntimeError with a helpful message if no input devices exist.
+        """
+        if self._input_device_resolved:
+            return self._input_device_index
+
+        self._input_device_index = self._resolve_input_device_index()
+        self._input_device_resolved = True
+        return self._input_device_index
+
+    def _resolve_input_device_index(self):
+        if self.pa is None:
+            return None
+
+        device_count = int(self.pa.get_device_count() or 0)
+
+        def is_input_device(idx: int) -> bool:
+            try:
+                info = self.pa.get_device_info_by_index(idx)
+            except Exception:
+                return False
+            return int(info.get("maxInputChannels") or 0) > 0
+
+        if AUDIO_INPUT_DEVICE_INDEX is not None:
+            idx = int(AUDIO_INPUT_DEVICE_INDEX)
+            if 0 <= idx < device_count and is_input_device(idx):
+                return idx
+            raise RuntimeError(
+                "Audio input initialization failed: invalid AUDIO_INPUT_DEVICE_INDEX="
+                f"{AUDIO_INPUT_DEVICE_INDEX}. Available input devices:\n"
+                + self.describe_input_devices()
+            )
+
+        if AUDIO_INPUT_DEVICE_NAME:
+            needle = AUDIO_INPUT_DEVICE_NAME.lower()
+            for idx in range(device_count):
+                if not is_input_device(idx):
+                    continue
+                try:
+                    name = str(self.pa.get_device_info_by_index(idx).get("name") or "")
+                except Exception:
+                    continue
+                if needle in name.lower():
+                    return idx
+
+            raise RuntimeError(
+                "Audio input initialization failed: could not find an input device "
+                f"matching AUDIO_INPUT_DEVICE_NAME={AUDIO_INPUT_DEVICE_NAME!r}. "
+                "Available input devices:\n"
+                + self.describe_input_devices()
+            )
+
+        # Default input device (if PortAudio has one).
+        try:
+            default_info = self.pa.get_default_input_device_info()
+            default_idx = int(default_info.get("index"))
+            if 0 <= default_idx < device_count and is_input_device(default_idx):
+                return default_idx
+        except Exception:
+            pass
+
+        # Fallback: first input device.
+        for idx in range(device_count):
+            if is_input_device(idx):
+                return idx
+
+        raise RuntimeError(
+            "Audio input initialization failed: no input devices found. "
+            "Check microphone connection and PipeWire/PulseAudio. "
+            "PortAudio devices:\n"
+            + self.describe_input_devices()
+        )
+
+    def describe_input_devices(self, limit: int = 20) -> str:
+        if self.pa is None:
+            return "<PyAudio not initialized>"
+
+        items = []
+        count = int(self.pa.get_device_count() or 0)
+        for idx in range(count):
+            try:
+                info = self.pa.get_device_info_by_index(idx)
+            except Exception:
+                continue
+            max_in = int(info.get("maxInputChannels") or 0)
+            if max_in <= 0:
+                continue
+            name = str(info.get("name") or "").strip()
+            items.append(f"[{idx}] {name} (in={max_in})")
+            if len(items) >= limit:
+                break
+        return "\n".join(items) if items else "<no input devices>"
+
    def cleanup(self):
        if self.pa:
            self.pa.terminate()
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -76,6 +76,18 @@ PORCUPINE_SENSITIVITY = float(os.getenv("PORCUPINE_SENSITIVITY", "0.8"))
 SAMPLE_RATE = 16000
 CHANNELS = 1

+# Выбор устройства ввода (микрофона).
+# Если не задано, используем default input device PortAudio (если есть).
+# Пример:
+# - AUDIO_INPUT_DEVICE_NAME=pulse
+# - AUDIO_INPUT_DEVICE_INDEX=2
+AUDIO_INPUT_DEVICE_NAME = os.getenv("AUDIO_INPUT_DEVICE_NAME", "").strip() or None
+_audio_index_raw = os.getenv("AUDIO_INPUT_DEVICE_INDEX", "").strip()
+try:
+    AUDIO_INPUT_DEVICE_INDEX = int(_audio_index_raw) if _audio_index_raw else None
+except Exception:
+    AUDIO_INPUT_DEVICE_INDEX = None
+
 # --- Настройка времени ---
 # Устанавливаем часовой пояс на Москву, чтобы будильник работал корректно