17
17
"
18
18
" start the llama.cpp server with a FIM-compatible model. for example:
19
19
"
20
- " $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 512
20
+ " $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -dt 0.1 --ubatch-size 512 --batch-size 1024 --cache-reuse 64
21
21
"
22
22
" --batch-size [512, model max context]
23
23
"
33
33
"
34
34
" :call llama#init()
35
35
"
36
+ " more info: https://github.com/ggerganov/llama.cpp/pull/9787/files
37
+ "
36
38
37
- " color of the suggested text
39
+ " colors (adjust to your liking)
38
40
highlight llama_hl_hint guifg= #ff772f
39
41
highlight llama_hl_info guifg= #77 ff2f
40
42
@@ -154,6 +156,8 @@ function! llama#init()
154
156
endif
155
157
endfunction
156
158
159
+ " compute how similar two chunks of text are
160
+ " 0 - no similarity, 1 - high similarity
157
161
" TODO: figure out something better
158
162
function ! s: chunk_sim (c0, c1)
159
163
let l: lines0 = len (a: c0 )
@@ -173,17 +177,23 @@ function! s:chunk_sim(c0, c1)
173
177
return 2.0 * l: common / (l: lines0 + l: lines1 )
174
178
endfunction
175
179
176
- " pick a chunk from the provided text and queue it for processing
180
+ " pick a random chunk of size g:llama_config.ring_chunk_size from the provided text and queue it for processing
181
+ "
182
+ " no_mod - do not pick chunks from buffers with pending changes
183
+ " do_evict - evict chunks that are very similar to the new one
184
+ "
177
185
function ! s: pick_chunk (text, no_mod, do_evict)
178
186
" do not pick chunks from buffers with pending changes or buffers that are not files
179
187
if a: no_mod && (getbufvar (bufnr (' %' ), ' &modified' ) || ! buflisted (bufnr (' %' )) || ! filereadable (expand (' %' )))
180
188
return
181
189
endif
182
190
191
+ " if the extra context option is disabled - do nothing
183
192
if g: llama_config .ring_n_chunks <= 0
184
193
return
185
194
endif
186
195
196
+ " don't pick very small chunks
187
197
if len (a: text ) < 3
188
198
return
189
199
endif
@@ -220,9 +230,9 @@ function! s:pick_chunk(text, no_mod, do_evict)
220
230
return
221
231
endif
222
232
223
- " evict chunks that are very similar to the new one
233
+ " evict queued chunks that are very similar to the new one
224
234
for i in range (len (s: ring_queued ) - 1 , 0 , -1 )
225
- if s: chunk_sim (s: ring_queued [i ].data, l: chunk ) > 0.5
235
+ if s: chunk_sim (s: ring_queued [i ].data, l: chunk ) > 0.9
226
236
if a: do_evict
227
237
call remove (s: ring_queued , i )
228
238
let s: ring_n_evict += 1
@@ -234,7 +244,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
234
244
235
245
" also from s:ring_chunks
236
246
for i in range (len (s: ring_chunks ) - 1 , 0 , -1 )
237
- if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.5
247
+ if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.9
238
248
if a: do_evict
239
249
call remove (s: ring_chunks , i )
240
250
let s: ring_n_evict += 1
@@ -244,6 +254,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
244
254
endif
245
255
endfor
246
256
257
+ " TODO: become parameter ?
247
258
if len (s: ring_queued ) == 16
248
259
call remove (s: ring_queued , 0 )
249
260
endif
@@ -253,7 +264,8 @@ function! s:pick_chunk(text, no_mod, do_evict)
253
264
" let &statusline = 'extra context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued)
254
265
endfunction
255
266
256
- " called every g:llama_config.ring_update_ms, processed chunks are moved to s:ring_chunks
267
+ " picks a queued chunk, sends it for processing and adds it to s:ring_chunks
268
+ " called every g:llama_config.ring_update_ms
257
269
function ! s: ring_update ()
258
270
call timer_start (g: llama_config .ring_update_ms, {- > s: ring_update ()})
259
271
@@ -306,15 +318,21 @@ function! s:ring_update()
306
318
\ g: llama_config .endpoint, shellescape (l: request )
307
319
\ )
308
320
321
+ " no callbacks because we don't need to process the response
309
322
call jobstart (l: curl_command , {})
310
323
endfunction
311
324
325
+ " necessary for 'inoremap <expr>'
312
326
function ! llama#fim_inline (is_auto, on_hold) abort
313
327
call llama#fim (a: is_auto , a: on_hold )
314
328
return ' '
315
329
endfunction
316
330
331
+ " the main FIM call
332
+ " takes local context around the cursor and sends it together with the extra context
333
+ " to the llama.cpp server for completion
317
334
function ! llama#fim (is_auto, on_hold) abort
335
+ " we already have a suggestion for the current cursor position
318
336
if a: on_hold && (s: hint_shown || (s: pos_x == col (' .' ) - 1 && s: pos_y == line (' .' )))
319
337
return
320
338
endif
@@ -415,6 +433,7 @@ function! llama#fim(is_auto, on_hold) abort
415
433
" TODO: per-file location
416
434
let l: delta_y = abs (s: pos_y - s: pos_y_pick )
417
435
436
+ " gather some extra context nearby and process it in the background
418
437
" only gather chunks if the cursor has moved a lot
419
438
" TODO: something more clever? reranking?
420
439
if a: is_auto && l: delta_y > 32
@@ -474,7 +493,7 @@ function! s:on_move()
474
493
call llama#fim_cancel ()
475
494
endfunction
476
495
477
- " callback that processes the result from the server
496
+ " callback that processes the FIM result from the server and displays the suggestion
478
497
function ! s: fim_on_stdout (job_id, data, event ) dict
479
498
let l: raw = join (a: data , " \n " )
480
499
if len (l: raw ) == 0
0 commit comments