Skip to content

Commit c667ae0

Browse files
committed
fix: support webm with audioread fallback
Signed-off-by: cpwan <[email protected]>
1 parent 907f935 commit c667ae0

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

vllm/entrypoints/openai/serving_transcription.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import asyncio
33
import io
44
import time
5+
import tempfile
56
from collections.abc import AsyncGenerator
67
from math import ceil
78
from typing import Final, Optional, Union, cast
@@ -24,8 +25,10 @@
2425
from vllm.utils import PlaceholderModule
2526

2627
try:
28+
import audioread
2729
import librosa
2830
except ImportError:
31+
audioread = PlaceholderModule("audioread") # type: ignore[assignment]
2932
librosa = PlaceholderModule("librosa") # type: ignore[assignment]
3033

3134
logger = init_logger(__name__)
@@ -201,8 +204,19 @@ async def _preprocess_transcription(
201204
if len(audio_data) / 1024**2 > MAX_AUDIO_CLIP_FILESIZE_MB:
202205
raise ValueError("Maximum file size exceeded.")
203206

204-
with io.BytesIO(audio_data) as bytes_:
205-
y, sr = librosa.load(bytes_)
207+
def load_audio(audio_data: bytes):
208+
file_bytes_content = audio_data
209+
try:
210+
with io.BytesIO(file_bytes_content) as bytes_:
211+
out = librosa.load(bytes_, sr=None)
212+
except:
213+
with tempfile.NamedTemporaryFile() as temp:
214+
temp.write(file_bytes_content)
215+
audio_read_obj = audioread.audio_open(temp.name)
216+
out = librosa.load(audio_read_obj, sr=None)
217+
return out
218+
219+
y, sr = load_audio(audio_data)
206220

207221
duration = librosa.get_duration(y=y, sr=sr)
208222
if duration > self.max_audio_clip_s:

0 commit comments

Comments
 (0)