feat: harden audio device compatibility across machines
This commit is contained in:
@@ -9,6 +9,7 @@ Listens for the "Alexandr" wake word.
|
||||
import pvporcupine
|
||||
import pyaudio
|
||||
import struct
|
||||
import numpy as np
|
||||
from ..core.config import (
|
||||
PORCUPINE_ACCESS_KEY,
|
||||
PORCUPINE_KEYWORD_PATH,
|
||||
@@ -24,6 +25,11 @@ class WakeWordDetector:
|
||||
self.porcupine = None
|
||||
self.audio_stream = None
|
||||
self.pa = None
|
||||
self._audio_manager = None
|
||||
self._input_device_index = None
|
||||
self._capture_sample_rate = None
|
||||
self._capture_frame_length = None
|
||||
self._resampled_pcm_buffer = np.array([], dtype=np.int16)
|
||||
self._stream_closed = True # Флаг состояния потока (закрыт/открыт)
|
||||
self._last_hit_ts = 0.0
|
||||
|
||||
@@ -37,11 +43,13 @@ class WakeWordDetector:
|
||||
)
|
||||
|
||||
# Используем общий экземпляр PyAudio
|
||||
audio_manager = get_audio_manager()
|
||||
self.pa = audio_manager.get_pyaudio()
|
||||
self._input_device_index = audio_manager.get_input_device_index()
|
||||
self._audio_manager = get_audio_manager()
|
||||
self.pa = self._audio_manager.get_pyaudio()
|
||||
self._open_stream()
|
||||
print(f"🎤 Ожидание wake word 'Alexandr' (sens={PORCUPINE_SENSITIVITY:.2f})...")
|
||||
print(
|
||||
"🎤 Ожидание wake word 'Alexandr' "
|
||||
f"(sens={PORCUPINE_SENSITIVITY:.2f}, mic_rate={self._capture_sample_rate})..."
|
||||
)
|
||||
|
||||
def _open_stream(self):
|
||||
"""Открытие аудиопотока с микрофона."""
|
||||
@@ -55,19 +63,28 @@ class WakeWordDetector:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Открываем поток с параметрами, которые требует Porcupine
|
||||
kwargs = {}
|
||||
if getattr(self, "_input_device_index", None) is not None:
|
||||
kwargs["input_device_index"] = self._input_device_index
|
||||
|
||||
self.audio_stream = self.pa.open(
|
||||
rate=self.porcupine.sample_rate,
|
||||
target_rate = int(self.porcupine.sample_rate)
|
||||
fallback_rates = [48000, 44100, 32000, 22050, 16000]
|
||||
self.audio_stream, self._input_device_index, actual_rate = self._audio_manager.open_input_stream(
|
||||
rate=target_rate,
|
||||
channels=1,
|
||||
format=pyaudio.paInt16,
|
||||
input=True,
|
||||
frames_per_buffer=self.porcupine.frame_length,
|
||||
**kwargs,
|
||||
preferred_index=self._input_device_index,
|
||||
fallback_rates=fallback_rates,
|
||||
)
|
||||
self._capture_sample_rate = int(actual_rate)
|
||||
self._capture_frame_length = max(
|
||||
64,
|
||||
int(
|
||||
round(
|
||||
self.porcupine.frame_length
|
||||
* self._capture_sample_rate
|
||||
/ target_rate
|
||||
)
|
||||
),
|
||||
)
|
||||
self._resampled_pcm_buffer = np.array([], dtype=np.int16)
|
||||
self._stream_closed = False
|
||||
|
||||
def stop_monitoring(self):
|
||||
@@ -80,6 +97,40 @@ class WakeWordDetector:
|
||||
pass
|
||||
self._stream_closed = True
|
||||
|
||||
def _resample_to_target_rate(self, pcm: np.ndarray) -> np.ndarray:
|
||||
target_rate = int(self.porcupine.sample_rate)
|
||||
source_rate = int(self._capture_sample_rate or target_rate)
|
||||
if source_rate == target_rate:
|
||||
return pcm
|
||||
if pcm.size == 0:
|
||||
return np.array([], dtype=np.int16)
|
||||
target_length = max(1, int(round(pcm.size * target_rate / source_rate)))
|
||||
x_old = np.arange(pcm.size, dtype=np.float32)
|
||||
x_new = np.linspace(0.0, float(max(0, pcm.size - 1)), target_length)
|
||||
resampled = np.interp(x_new, x_old, pcm.astype(np.float32))
|
||||
return np.asarray(resampled, dtype=np.int16)
|
||||
|
||||
def _read_porcupine_frame(self):
|
||||
target_length = int(self.porcupine.frame_length)
|
||||
if self._capture_sample_rate == self.porcupine.sample_rate:
|
||||
pcm = self.audio_stream.read(target_length, exception_on_overflow=False)
|
||||
return np.asarray(struct.unpack_from("h" * target_length, pcm), dtype=np.int16)
|
||||
|
||||
while self._resampled_pcm_buffer.size < target_length:
|
||||
raw = self.audio_stream.read(
|
||||
self._capture_frame_length, exception_on_overflow=False
|
||||
)
|
||||
captured = np.frombuffer(raw, dtype=np.int16)
|
||||
converted = self._resample_to_target_rate(captured)
|
||||
if converted.size:
|
||||
self._resampled_pcm_buffer = np.concatenate(
|
||||
(self._resampled_pcm_buffer, converted)
|
||||
)
|
||||
|
||||
frame = self._resampled_pcm_buffer[:target_length]
|
||||
self._resampled_pcm_buffer = self._resampled_pcm_buffer[target_length:]
|
||||
return frame
|
||||
|
||||
def wait_for_wakeword(self, timeout: float = None) -> bool:
|
||||
"""
|
||||
Блокирующая функция: ждет, пока не будет услышана фраза "Alexandr"
|
||||
@@ -107,14 +158,10 @@ class WakeWordDetector:
|
||||
return False
|
||||
|
||||
# Читаем небольшой кусочек аудио (frame)
|
||||
pcm = self.audio_stream.read(
|
||||
self.porcupine.frame_length, exception_on_overflow=False
|
||||
)
|
||||
# Конвертируем байты в кортеж чисел (требование Porcupine)
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||
pcm = self._read_porcupine_frame()
|
||||
|
||||
# Обрабатываем фрейм через Porcupine
|
||||
keyword_index = self.porcupine.process(pcm)
|
||||
keyword_index = self.porcupine.process(pcm.tolist())
|
||||
|
||||
# Если keyword_index >= 0, значит ключевое слово обнаружено
|
||||
if keyword_index >= 0:
|
||||
@@ -140,12 +187,9 @@ class WakeWordDetector:
|
||||
try:
|
||||
self._open_stream()
|
||||
|
||||
pcm = self.audio_stream.read(
|
||||
self.porcupine.frame_length, exception_on_overflow=False
|
||||
)
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, pcm)
|
||||
pcm = self._read_porcupine_frame()
|
||||
|
||||
keyword_index = self.porcupine.process(pcm)
|
||||
keyword_index = self.porcupine.process(pcm.tolist())
|
||||
if keyword_index >= 0:
|
||||
now = time.time()
|
||||
if now - self._last_hit_ts < 0.2: # Уменьшаем интервал для более быстрой реакции
|
||||
|
||||
Reference in New Issue
Block a user