Skip to content

Commit d629a66

Browse files
committed
Example for whisper in Node.js
1 parent aaac3cc commit d629a66

File tree

4 files changed

+1237
-0
lines changed

4 files changed

+1237
-0
lines changed

whisper-node/README.md

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# whisper-node
2+
3+
This project demonstrates how to use OpenAI Whisper in a Node.js environment.
4+
5+
## Instructions
6+
7+
1. Clone the repository:
8+
```sh
9+
git clone https://github.com/huggingface/transformers.js-examples.git
10+
```
11+
2. Change directory to the `whisper-node` project:
12+
```sh
13+
cd transformers.js-examples/whisper-node
14+
```
15+
3. Install the dependencies:
16+
```sh
17+
npm install
18+
```
19+
4. Run the example:
20+
```sh
21+
node index.js
22+
```

whisper-node/index.js

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { pipeline } from "@huggingface/transformers";
2+
import wavefile from "wavefile";
3+
4+
// Load model
5+
const transcriber = await pipeline(
6+
"automatic-speech-recognition",
7+
"Xenova/whisper-tiny.en",
8+
);
9+
10+
// Load audio data
11+
const url =
12+
"https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav";
13+
const buffer = Buffer.from(await fetch(url).then((x) => x.arrayBuffer()));
14+
15+
// Read .wav file and convert it to required format
16+
const wav = new wavefile.WaveFile(buffer);
17+
wav.toBitDepth("32f"); // Pipeline expects input as a Float32Array
18+
wav.toSampleRate(16000); // Whisper expects audio with a sampling rate of 16000
19+
let audioData = wav.getSamples();
20+
if (Array.isArray(audioData)) {
21+
if (audioData.length > 1) {
22+
const SCALING_FACTOR = Math.sqrt(2);
23+
24+
// Merge channels (into first channel to save memory)
25+
for (let i = 0; i < audioData[0].length; ++i) {
26+
audioData[0][i] =
27+
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
28+
}
29+
}
30+
31+
// Select first channel
32+
audioData = audioData[0];
33+
}
34+
35+
// Run model
36+
const start = performance.now();
37+
const output = await transcriber(audioData);
38+
const end = performance.now();
39+
console.log(`Execution duration: ${(end - start) / 1000} seconds`);
40+
console.log(output);
41+
// { text: ' And so my fellow Americans ask not what your country can do for you, ask what you can do for your country.' }

0 commit comments

Comments
 (0)