feat: harden audio device compatibility across machines

2026-03-12 14:08:20 +03:00
parent e9f26f8050
commit 6c2702d5e3
7 changed files with 480 additions and 74 deletions
--- a/app/audio/tts.py
+++ b/app/audio/tts.py
@@ -14,9 +14,11 @@ import time
 import warnings

 import numpy as np
+import pyaudio
 import sounddevice as sd
 import torch

+from ..core.audio_manager import get_audio_manager
 from ..core.config import TTS_EN_SPEAKER, TTS_SAMPLE_RATE, TTS_SPEAKER

 # Подавляем предупреждения Silero о длинном тексте (мы сами его режем)
@@ -36,6 +38,8 @@ class TextToSpeech:
        self.speaker_en = TTS_EN_SPEAKER
        self._interrupted = False
        self._stop_flag = threading.Event()
+        self._audio_manager = None
+        self._output_device_index = None

    def _load_model(self, language: str):
        """
@@ -232,14 +236,13 @@ class TextToSpeech:
                audio_np = audio.numpy()

                if check_interrupt:
-                    # Воспроизведение с проверкой прерывания (сложная логика)
-                    if not self._play_with_interrupt(audio_np, check_interrupt):
+                    if not self._play_audio_with_interrupt(audio_np, check_interrupt):
                        success = False
                        break
                else:
-                    # Обычное воспроизведение (блокирующее)
-                    sd.play(audio_np, self.sample_rate)
-                    sd.wait()
+                    if not self._play_audio_blocking(audio_np):
+                        success = False
+                        break

            except Exception as e:
                print(f"❌ Ошибка TTS (часть {i + 1}/{total_chunks}): {e}")
@@ -293,6 +296,75 @@ class TextToSpeech:
            text, check_interrupt=check_interrupt, language=language
        )

+    def _resample_audio(self, audio_np: np.ndarray, src_rate: int, dst_rate: int):
+        if src_rate == dst_rate:
+            return audio_np.astype(np.float32, copy=False)
+        if audio_np.size == 0:
+            return np.asarray([], dtype=np.float32)
+
+        target_length = max(1, int(round(audio_np.size * dst_rate / src_rate)))
+        x_old = np.arange(audio_np.size, dtype=np.float32)
+        x_new = np.linspace(0.0, float(max(0, audio_np.size - 1)), target_length)
+        resampled = np.interp(x_new, x_old, audio_np.astype(np.float32))
+        return np.asarray(resampled, dtype=np.float32)
+
+    def _play_audio_blocking(self, audio_np: np.ndarray) -> bool:
+        try:
+            sd.play(audio_np, self.sample_rate)
+            sd.wait()
+            return True
+        except Exception as exc:
+            print(f"⚠️ sounddevice playback failed, fallback to PyAudio: {exc}")
+            return self._play_with_pyaudio(audio_np, check_interrupt=None)
+
+    def _play_audio_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
+        try:
+            return self._play_with_interrupt_sounddevice(audio_np, check_interrupt)
+        except Exception as exc:
+            print(
+                "⚠️ sounddevice playback-with-interrupt failed, fallback to PyAudio: "
+                f"{exc}"
+            )
+            return self._play_with_pyaudio(audio_np, check_interrupt=check_interrupt)
+
+    def _play_with_pyaudio(self, audio_np: np.ndarray, check_interrupt=None) -> bool:
+        if self._audio_manager is None:
+            self._audio_manager = get_audio_manager()
+
+        output_stream = None
+        try:
+            output_stream, self._output_device_index, out_rate = (
+                self._audio_manager.open_output_stream(
+                    rate=self.sample_rate,
+                    channels=1,
+                    format=pyaudio.paFloat32,
+                    preferred_index=self._output_device_index,
+                    fallback_rates=[48000, 44100, 32000, 22050],
+                )
+            )
+            pcm = self._resample_audio(audio_np, self.sample_rate, out_rate)
+            chunk_size = max(256, int(out_rate * 0.03))
+
+            for offset in range(0, len(pcm), chunk_size):
+                if check_interrupt and check_interrupt():
+                    self._interrupted = True
+                    return False
+                output_stream.write(pcm[offset : offset + chunk_size].tobytes())
+            return True
+        except Exception as exc:
+            print(f"❌ PyAudio playback failed: {exc}")
+            return False
+        finally:
+            if output_stream is not None:
+                try:
+                    output_stream.stop_stream()
+                except Exception:
+                    pass
+                try:
+                    output_stream.close()
+                except Exception:
+                    pass
+
    def _check_interrupt_worker(self, check_interrupt):
        """
        Фоновая функция для потока: постоянно опрашивает check_interrupt.
@@ -308,7 +380,9 @@ class TextToSpeech:
            except Exception:
                pass

-    def _play_with_interrupt(self, audio_np: np.ndarray, check_interrupt) -> bool:
+    def _play_with_interrupt_sounddevice(
+        self, audio_np: np.ndarray, check_interrupt
+    ) -> bool:
        """
        Воспроизводит аудио, параллельно проверяя условие прерывания в отдельном потоке.
        """