Skip to content

Commit acf6d19

Browse files
committed
llama.vim : final touches
ggml-ci
1 parent 3969aa3 commit acf6d19

File tree

2 files changed

+28
-9
lines changed

2 files changed

+28
-9
lines changed

examples/llama.vim

+27-8
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"
1818
" start the llama.cpp server with a FIM-compatible model. for example:
1919
"
20-
" $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 512
20+
" $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 64
2121
"
2222
" --batch-size [512, model max context]
2323
"
@@ -33,8 +33,10 @@
3333
"
3434
" :call llama#init()
3535
"
36+
" more info: https://github.com/ggerganov/llama.cpp/pull/9787/files
37+
"
3638

37-
" color of the suggested text
39+
" colors (adjust to your liking)
3840
highlight llama_hl_hint guifg=#ff772f
3941
highlight llama_hl_info guifg=#77ff2f
4042

@@ -154,6 +156,8 @@ function! llama#init()
154156
endif
155157
endfunction
156158

159+
" compute how similar two chunks of text are
160+
" 0 - no similarity, 1 - high similarity
157161
" TODO: figure out something better
158162
function! s:chunk_sim(c0, c1)
159163
let l:lines0 = len(a:c0)
@@ -173,17 +177,23 @@ function! s:chunk_sim(c0, c1)
173177
return 2.0 * l:common / (l:lines0 + l:lines1)
174178
endfunction
175179

176-
" pick a chunk from the provided text and queue it for processing
180+
" pick a random chunk of size g:llama_config.ring_chunk_size from the provided text and queue it for processing
181+
"
182+
" no_mod - do not pick chunks from buffers with pending changes
183+
" do_evict - evict chunks that are very similar to the new one
184+
"
177185
function! s:pick_chunk(text, no_mod, do_evict)
178186
" do not pick chunks from buffers with pending changes or buffers that are not files
179187
if a:no_mod && (getbufvar(bufnr('%'), '&modified') || !buflisted(bufnr('%')) || !filereadable(expand('%')))
180188
return
181189
endif
182190

191+
" if the extra context option is disabled - do nothing
183192
if g:llama_config.ring_n_chunks <= 0
184193
return
185194
endif
186195

196+
" don't pick very small chunks
187197
if len(a:text) < 3
188198
return
189199
endif
@@ -220,9 +230,9 @@ function! s:pick_chunk(text, no_mod, do_evict)
220230
return
221231
endif
222232

223-
" evict chunks that are very similar to the new one
233+
" evict queued chunks that are very similar to the new one
224234
for i in range(len(s:ring_queued) - 1, 0, -1)
225-
if s:chunk_sim(s:ring_queued[i].data, l:chunk) > 0.5
235+
if s:chunk_sim(s:ring_queued[i].data, l:chunk) > 0.9
226236
if a:do_evict
227237
call remove(s:ring_queued, i)
228238
let s:ring_n_evict += 1
@@ -234,7 +244,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
234244

235245
" also from s:ring_chunks
236246
for i in range(len(s:ring_chunks) - 1, 0, -1)
237-
if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.5
247+
if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.9
238248
if a:do_evict
239249
call remove(s:ring_chunks, i)
240250
let s:ring_n_evict += 1
@@ -244,6 +254,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
244254
endif
245255
endfor
246256

257+
" TODO: become parameter ?
247258
if len(s:ring_queued) == 16
248259
call remove(s:ring_queued, 0)
249260
endif
@@ -253,7 +264,8 @@ function! s:pick_chunk(text, no_mod, do_evict)
253264
"let &statusline = 'extra context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued)
254265
endfunction
255266

256-
" called every g:llama_config.ring_update_ms, processed chunks are moved to s:ring_chunks
267+
" picks a queued chunk, sends it for processing and adds it to s:ring_chunks
268+
" called every g:llama_config.ring_update_ms
257269
function! s:ring_update()
258270
call timer_start(g:llama_config.ring_update_ms, {-> s:ring_update()})
259271

@@ -306,15 +318,21 @@ function! s:ring_update()
306318
\ g:llama_config.endpoint, shellescape(l:request)
307319
\ )
308320

321+
" no callbacks because we don't need to process the response
309322
call jobstart(l:curl_command, {})
310323
endfunction
311324

325+
" necessary for 'inoremap <expr>'
312326
function! llama#fim_inline(is_auto, on_hold) abort
313327
call llama#fim(a:is_auto, a:on_hold)
314328
return ''
315329
endfunction
316330

331+
" the main FIM call
332+
" takes local context around the cursor and sends it together with the extra context
333+
" to the llama.cpp server for completion
317334
function! llama#fim(is_auto, on_hold) abort
335+
" we already have a suggestion for the current cursor position
318336
if a:on_hold && (s:hint_shown || (s:pos_x == col('.') - 1 && s:pos_y == line('.')))
319337
return
320338
endif
@@ -415,6 +433,7 @@ function! llama#fim(is_auto, on_hold) abort
415433
" TODO: per-file location
416434
let l:delta_y = abs(s:pos_y - s:pos_y_pick)
417435

436+
" gather some extra context nearby and process it in the background
418437
" only gather chunks if the cursor has moved a lot
419438
" TODO: something more clever? reranking?
420439
if a:is_auto && l:delta_y > 32
@@ -474,7 +493,7 @@ function! s:on_move()
474493
call llama#fim_cancel()
475494
endfunction
476495

477-
" callback that processes the result from the server
496+
" callback that processes the FIM result from the server and displays the suggestion
478497
function! s:fim_on_stdout(job_id, data, event) dict
479498
let l:raw = join(a:data, "\n")
480499
if len(l:raw) == 0

src/llama-sampling.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1741,7 +1741,7 @@ struct llama_sampler * llama_sampler_init_logit_bias(
17411741

17421742
// infill
17431743

1744-
#define GGML_DEBUG_SAMPLER_INFILL
1744+
//#define GGML_DEBUG_SAMPLER_INFILL
17451745

17461746
struct llama_sampler_infill {
17471747
const struct llama_vocab * vocab;

0 commit comments

Comments
 (0)