Skip to content

Commit eff3570

Browse files
felrockggerganov
andauthored
server : add a REST Whisper server example with OAI-like API (ggml-org#1380)
* Add first draft of server * Added json support and base funcs for server.cpp * Add more user input via api-request also some clean up * Add reqest params and load post function Also some general clean up * Remove unused function * Add readme * Add exception handlers * Update examples/server/server.cpp * make : add server target * Add magic curl syntax Co-authored-by: Georgi Gerganov <[email protected]> --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent fa19bc4 commit eff3570

File tree

9 files changed

+34631
-4
lines changed

9 files changed

+34631
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ build-sanitize-thread/
3131
/talk-llama
3232
/bench
3333
/quantize
34+
/server
3435
/lsp
3536

3637
arm_neon.h

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
default: main bench quantize
1+
default: main bench quantize server
22

33
ifndef UNAME_S
44
UNAME_S := $(shell uname -s)
@@ -338,7 +338,7 @@ libwhisper.so: $(WHISPER_OBJ)
338338
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
339339

340340
clean:
341-
rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
341+
rm -f *.o main stream command talk talk-llama bench quantize server lsp libwhisper.a libwhisper.so
342342

343343
#
344344
# Examples
@@ -359,6 +359,9 @@ bench: examples/bench/bench.cpp $(WHISPER_OBJ)
359359
quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
360360
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
361361

362+
server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
363+
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
364+
362365
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
363366
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
364367

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ elseif(CMAKE_JS_VERSION)
6565
else()
6666
add_subdirectory(main)
6767
add_subdirectory(stream)
68+
add_subdirectory(server)
6869
add_subdirectory(command)
6970
add_subdirectory(bench)
7071
add_subdirectory(quantize)

examples/main/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
165165
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
166166
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
167167
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
168-
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
169-
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
168+
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
169+
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
170170
else {
171171
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
172172
whisper_print_usage(argc, argv, params);

examples/server/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
set(TARGET server)
2+
add_executable(${TARGET} server.cpp httplib.h json.hpp)
3+
4+
include(DefaultTargetOptions)
5+
6+
target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})

examples/server/README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# whisper.cpp http server
2+
3+
Simple http server. WAV Files are passed to the inference model via http requests.
4+
5+
```
6+
./server -h
7+
8+
usage: ./bin/server [options]
9+
10+
options:
11+
-h, --help [default] show this help message and exit
12+
-t N, --threads N [4 ] number of threads to use during computation
13+
-p N, --processors N [1 ] number of processors to use during computation
14+
-ot N, --offset-t N [0 ] time offset in milliseconds
15+
-on N, --offset-n N [0 ] segment index offset
16+
-d N, --duration N [0 ] duration of audio to process in milliseconds
17+
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
18+
-ml N, --max-len N [0 ] maximum segment length in characters
19+
-sow, --split-on-word [false ] split on word rather than on token
20+
-bo N, --best-of N [2 ] number of best candidates to keep
21+
-bs N, --beam-size N [-1 ] beam size for beam search
22+
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
23+
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
24+
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
25+
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
26+
-tr, --translate [false ] translate from source language to english
27+
-di, --diarize [false ] stereo audio diarization
28+
-tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
29+
-nf, --no-fallback [false ] do not use temperature fallback while decoding
30+
-ps, --print-special [false ] print special tokens
31+
-pc, --print-colors [false ] print colors
32+
-pp, --print-progress [false ] print progress
33+
-nt, --no-timestamps [false ] do not print timestamps
34+
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
35+
-dl, --detect-language [false ] exit after automatically detecting language
36+
--prompt PROMPT [ ] initial prompt
37+
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
38+
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
39+
--host HOST, [127.0.0.1] Hostname/ip-adress for the server
40+
--port PORT, [8080 ] Port number for the server
41+
```
42+
43+
## request examples
44+
45+
**/inference**
46+
```
47+
curl 127.0.0.1:8080/inference \
48+
-H "Content-Type: multipart/form-data" \
49+
-F file="@<file-path>" \
50+
-F temperature="0.2" \
51+
-F response-format="json"
52+
```
53+
54+
**/load**
55+
```
56+
curl 127.0.0.1:8080/load \
57+
-H "Content-Type: multipart/form-data" \
58+
-F model="<path-to-model-file>"
59+
```

0 commit comments

Comments
 (0)