1
+ " LLM-based code completion using llama.cpp
2
+ "
3
+ " requires:
4
+ " - neovim
5
+ " - llama.cpp server instance
6
+ "
1
7
" sample config:
2
8
"
3
- " - Ctrl+F - trigger FIM completion manually
9
+ " - Tab - accept the current suggestion
10
+ " - Shift+Tab - accept just the first line
11
+ " - Ctrl+F - trigger FIM completion manually
12
+ "
13
+ " make symlink or copy this file to ~/.config/nvim/autoload/llama.vim
14
+ "
15
+ " start the llama.cpp server with a FIM-compatible model. for example:
16
+ "
17
+ " llama-server -m {model.gguf} --port 8012 -ngl 99 -fa -ub 1024 -b 2048
18
+ "
19
+ " adjust the batch size to control how much of the provided context will be used during the inference
20
+ " lower values will use smaller part of the context, which will result in faster processing
4
21
"
5
- " run this once to initialise the plugin :
22
+ " run this once to initialise llama.vim :
6
23
"
7
- " :call llama#init()
24
+ " :call llama#init()
8
25
"
9
26
10
27
" color of the suggested text
11
28
highlight llama_hl_hint guifg= #ff772f
12
29
highlight llama_hl_info guifg= #77 ff2f
13
30
31
+ " endpoint: llama.cpp server endpoint
32
+ " n_prefix: number of lines to include in the prefix
33
+ " n_suffix: number of lines to include in the suffix
34
+ " n_predict: max number of tokens to predict
35
+ " t_max_prompt_ms: max alloted time for the text generation
36
+ " show_info: show extra info about the inference
37
+ " auto_fim: trigger FIM completion automatically on cursor movement
14
38
let s: default_config = {
15
39
\ ' endpoint' : ' http://127.0.0.1:8012/infill' ,
16
40
\ ' n_prefix' : 128 ,
17
41
\ ' n_suffix' : 128 ,
18
42
\ ' n_predict' : 64 ,
19
43
\ ' t_max_prompt_ms' : 300 ,
20
44
\ ' t_max_predict_ms' : 200 ,
45
+ \ ' show_info' : v: true ,
21
46
\ ' auto_fim' : v: true ,
22
- \ ' stop' : [" \n " ]
23
47
\ }
24
48
25
49
let g: llama_config = get (g: , ' llama_config' , s: default_config )
26
50
27
51
function ! llama#init ()
28
- let s: pos_x = 0
52
+ let s: pos_x = 0 " cursor position upon start of completion
29
53
let s: pos_y = 0
30
54
let s: pos_x0 = 0 " pos_x corrected for end-of-line edge case
31
55
@@ -46,8 +70,8 @@ function! llama#init()
46
70
47
71
augroup llama
48
72
autocmd !
49
- autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
50
- autocmd InsertLeave * call llama#fim_cancel ()
73
+ autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
74
+ autocmd InsertLeavePre * call llama#fim_cancel ()
51
75
52
76
autocmd CursorMoved * call llama#fim_cancel ()
53
77
augroup END
@@ -90,7 +114,6 @@ function! llama#fim(is_auto) abort
90
114
\ ' prompt' : " " ,
91
115
\ ' input_prefix' : l: prefix ,
92
116
\ ' input_suffix' : l: suffix ,
93
- " \ 'stop': g:llama_config.stop,
94
117
\ ' n_predict' : g: llama_config .n_predict,
95
118
\ ' penalty_last_n' : 0 ,
96
119
\ ' top_k' : 100 ,
@@ -126,16 +149,23 @@ function! llama#fim(is_auto) abort
126
149
endif
127
150
endfunction
128
151
129
- function ! llama#fim_accept ()
152
+ " if first_line == v:true accept only the first line of the response
153
+ function ! llama#fim_accept (first_line)
130
154
" insert the suggestion at the cursor location
131
155
if s: can_accept && len (s: content ) > 0
132
156
call setline (s: pos_y , s: line_cur [:(s: pos_x0 - 1 )] . s: content [0 ])
133
157
if len (s: content ) > 1
134
- call append (s: pos_y , s: content [1 :-1 ])
158
+ if ! a: first_line
159
+ call append (s: pos_y , s: content [1 :-1 ])
160
+ endif
135
161
endif
136
162
137
163
" move the cursor to the end of the accepted text
138
- call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
164
+ if ! a: first_line
165
+ call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
166
+ else
167
+ call cursor (s: pos_y , s: pos_x + len (s: content [0 ]) - 1 )
168
+ endif
139
169
endif
140
170
141
171
call llama#fim_cancel ()
@@ -146,6 +176,11 @@ function! llama#fim_cancel()
146
176
call jobstop (s: current_job )
147
177
endif
148
178
179
+ if s: timer_fim != -1
180
+ call timer_stop (s: timer_fim )
181
+ let s: timer_fim = -1
182
+ endif
183
+
149
184
" clear the virtual text
150
185
let l: bufnr = bufnr (' %' )
151
186
@@ -155,7 +190,9 @@ function! llama#fim_cancel()
155
190
call nvim_buf_clear_namespace (l: bufnr , l: id_vt_fim , 0 , -1 )
156
191
call nvim_buf_clear_namespace (l: bufnr , l: id_vt_info , 0 , -1 )
157
192
193
+ " remove the mappings
158
194
silent ! iunmap <buffer> <Tab>
195
+ silent ! iunmap <buffer> <S-Tab>
159
196
silent ! iunmap <buffer> <Esc>
160
197
161
198
augroup llama_insert
@@ -173,6 +210,8 @@ function! s:fim_auto_enable()
173
210
augroup END
174
211
endfunction
175
212
213
+ " auto-start a fim job a short time after the cursor has moved
214
+ " if there is already a job queued - cancel it
176
215
function ! s: fim_auto ()
177
216
if s: current_job != v: null
178
217
call jobstop (s: current_job )
@@ -189,7 +228,7 @@ function! s:fim_auto()
189
228
let s: timer_fim = timer_start (500 , {- > llama#fim (v: true )})
190
229
endfunction
191
230
192
-
231
+ " callback that processes the result from the server
193
232
function ! s: fim_on_stdout (job_id, data, event ) dict
194
233
let l: raw = join (a: data , " \n " )
195
234
if len (l: raw ) == 0
@@ -199,6 +238,13 @@ function! s:fim_on_stdout(job_id, data, event) dict
199
238
let s: can_accept = v: true
200
239
let l: has_info = v: false
201
240
241
+ if s: can_accept && v: shell_error
242
+ if ! self .is_auto
243
+ call add (s: content , " <| curl error: is the server on? |>" )
244
+ endif
245
+ let s: can_accept = v: false
246
+ endif
247
+
202
248
let l: n_prompt = 0
203
249
let l: t_prompt_ms = 1.0
204
250
let l: s_prompt = 0
@@ -207,13 +253,6 @@ function! s:fim_on_stdout(job_id, data, event) dict
207
253
let l: t_predict_ms = 1.0
208
254
let l: s_predict = 0
209
255
210
- if s: can_accept && v: shell_error
211
- if ! self .is_auto
212
- call add (s: content , " <| curl error: is the server on? |>" )
213
- endif
214
- let s: can_accept = v: false
215
- endif
216
-
217
256
" get the generated suggestion
218
257
if s: can_accept
219
258
let l: response = json_decode (l: raw )
@@ -227,7 +266,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
227
266
call remove (s: content , -1 )
228
267
endwhile
229
268
230
- " if response.timings
269
+ " if response.timings is available
231
270
if len (get (l: response , ' timings' , {})) > 0
232
271
let l: has_info = v: true
233
272
let l: timings = get (l: response , ' timings' , {})
@@ -264,8 +303,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
264
303
let l: id_vt_fim = nvim_create_namespace (' vt_fim' )
265
304
let l: id_vt_info = nvim_create_namespace (' vt_info' )
266
305
267
- " construct the info message:
268
- if l: has_info
306
+ " construct the info message and display it to the right of the current line
307
+ if g: llama_config .show_info && l: has_info
269
308
" prefix the info string with whitespace in order to offset it to the right of the fim overlay
270
309
let l: prefix = repeat (' ' , len (s: content [0 ]) - len (s: line_cur_suffix ) + 3 )
271
310
@@ -282,6 +321,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
282
321
\ })
283
322
endif
284
323
324
+ " display the suggestion
285
325
call nvim_buf_set_extmark (l: bufnr , l: id_vt_fim , s: pos_y - 1 , s: pos_x - 1 , {
286
326
\ ' virt_text' : [[s: content [0 ], ' llama_hl_hint' ]],
287
327
\ ' virt_text_win_col' : virtcol (' .' ) - 1
@@ -293,8 +333,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
293
333
\ })
294
334
295
335
" setup accept/cancel events
296
- inoremap <buffer> <Tab> <C-O> :call llama#fim_accept()<CR>
297
- inoremap <buffer> <Esc > <C-O> :call llama#fim_cancel( )<CR><Esc >
336
+ inoremap <buffer> <Tab> <C-O> :call llama#fim_accept(v:false )<CR>
337
+ inoremap <buffer> <S-Tab > <C-O> :call llama#fim_accept(v:true )<CR>
298
338
299
339
augroup llama_insert
300
340
autocmd !
0 commit comments