|
2 | 2 | import asyncio
|
3 | 3 | import io
|
4 | 4 | import time
|
| 5 | +import tempfile |
5 | 6 | from collections.abc import AsyncGenerator
|
6 | 7 | from math import ceil
|
7 | 8 | from typing import Final, Optional, Union, cast
|
|
24 | 25 | from vllm.utils import PlaceholderModule
|
25 | 26 |
|
26 | 27 | try:
|
| 28 | + import audioread |
27 | 29 | import librosa
|
28 | 30 | except ImportError:
|
| 31 | + audioread = PlaceholderModule("audioread") # type: ignore[assignment] |
29 | 32 | librosa = PlaceholderModule("librosa") # type: ignore[assignment]
|
30 | 33 |
|
31 | 34 | logger = init_logger(__name__)
|
@@ -201,8 +204,19 @@ async def _preprocess_transcription(
|
201 | 204 | if len(audio_data) / 1024**2 > MAX_AUDIO_CLIP_FILESIZE_MB:
|
202 | 205 | raise ValueError("Maximum file size exceeded.")
|
203 | 206 |
|
204 |
| - with io.BytesIO(audio_data) as bytes_: |
205 |
| - y, sr = librosa.load(bytes_) |
| 207 | + def load_audio(audio_data: bytes): |
| 208 | + file_bytes_content = audio_data |
| 209 | + try: |
| 210 | + with io.BytesIO(file_bytes_content) as bytes_: |
| 211 | + out = librosa.load(bytes_, sr=None) |
| 212 | + except: |
| 213 | + with tempfile.NamedTemporaryFile() as temp: |
| 214 | + temp.write(file_bytes_content) |
| 215 | + audio_read_obj = audioread.audio_open(temp.name) |
| 216 | + out = librosa.load(audio_read_obj, sr=None) |
| 217 | + return out |
| 218 | + |
| 219 | + y, sr = load_audio(audio_data) |
206 | 220 |
|
207 | 221 | duration = librosa.get_duration(y=y, sr=sr)
|
208 | 222 | if duration > self.max_audio_clip_s:
|
|
0 commit comments