Skip to content

Commit a48830d

Browse files
committed
llama.vim : fix large chunk accept + comments [no ci]
1 parent 4a81890 commit a48830d

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

examples/llama.vim

+11-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"
1818
" start the llama.cpp server with a FIM-compatible model. for example:
1919
"
20-
" $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 64
20+
" $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 256
2121
"
2222
" --batch-size [512, model max context]
2323
"
@@ -29,6 +29,12 @@
2929
" chunks the batch into smaller chunks for faster processing
3030
" depends on the specific hardware. use llama-bench to profile and determine the best size
3131
"
32+
" --cache-reuse (ge:llama_config.n_predict, 1024]
33+
"
34+
" this should be either 0 (disabled) or strictly larger than g:llama_config.n_predict
35+
" using non-zero value enables context reuse on the server side which dramatically improves the performance at
36+
" large contexts. a value of 256 should be good for all cases
37+
"
3238
" run this once to initialise llama.vim:
3339
"
3440
" :call llama#init()
@@ -43,8 +49,8 @@ highlight llama_hl_info guifg=#77ff2f
4349
" general parameters:
4450
"
4551
" endpoint: llama.cpp server endpoint
46-
" n_prefix: number of lines before the cursor location to include in the prefix
47-
" n_suffix: number of lines after the cursor location to include in the suffix
52+
" n_prefix: number of lines before the cursor location to include in the local prefix
53+
" n_suffix: number of lines after the cursor location to include in the local suffix
4854
" n_predict: max number of tokens to predict
4955
" t_max_prompt_ms: max alloted time for the prompt processing (TODO: not yet supported)
5056
" t_max_predict_ms: max alloted time for the prediction
@@ -72,7 +78,7 @@ highlight llama_hl_info guifg=#77ff2f
7278
let s:default_config = {
7379
\ 'endpoint': 'http://127.0.0.1:8012/infill',
7480
\ 'n_prefix': 256,
75-
\ 'n_suffix': 8,
81+
\ 'n_suffix': 64,
7682
\ 'n_predict': 128,
7783
\ 't_max_prompt_ms': 500,
7884
\ 't_max_predict_ms': 1000,
@@ -463,7 +469,7 @@ function! llama#fim_accept(first_line)
463469

464470
" move the cursor to the end of the accepted text
465471
if !a:first_line && len(s:content) > 1
466-
call cursor(s:pos_y + len(s:content) - 1, s:pos_x + s:pos_dx)
472+
call cursor(s:pos_y + len(s:content) - 1, s:pos_x + s:pos_dx + 1)
467473
else
468474
call cursor(s:pos_y, s:pos_x + len(s:content[0]))
469475
endif

0 commit comments

Comments
 (0)