Skip to content

Commit 949c928

Browse files
committed
llama.vim : wip [no ci]
1 parent 48efee1 commit 949c928

File tree

1 file changed

+70
-124
lines changed

1 file changed

+70
-124
lines changed

examples/llama.vim

+70-124
Original file line numberDiff line numberDiff line change
@@ -1,135 +1,81 @@
1-
" Requires an already running llama.cpp server
2-
" To install either copy or symlink to ~/.vim/autoload/llama.vim
3-
" Then start with either :call llama#doLlamaGen(),
4-
" or add a keybind to your vimrc such as
5-
" nnoremap Z :call llama#doLlamaGen()<CR>
6-
" Similarly, you could add an insert mode keybind with
7-
" inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR>
1+
" sample config:
82
"
9-
" g:llama_api_url, g:llama_api_key and g:llama_overrides can be configured in your .vimrc
10-
" let g:llama_api_url = "192.168.1.10:8080"
11-
" llama_overrides can also be set through buffer/window scopes. For instance
12-
" autocmd filetype python let b:llama_overrides = {"temp": 0.2}
13-
" Could be added to your .vimrc to automatically set a lower temperature when
14-
" editing a python script
15-
" Additionally, an override dict can be stored at the top of a file
16-
" !*{"stop": ["User:"]}
17-
" Could be added to the start of your chatlog.txt to set the stopping token
18-
" These parameter dicts are merged together from lowest to highest priority:
19-
" server default -> g:llama_overrides -> w:llama_overrides ->
20-
" b:llama_overrides -> in file (!*) overrides
3+
" - Ctrl+F - trigger FIM completion
4+
"
5+
" copy paste this in your .vimrc:
6+
"
7+
"augroup llama_cpp
8+
" autocmd!
9+
" autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <Esc>:call llama#fim()<CR>
10+
"augroup END
2111
"
22-
" Sublists (like logit_bias and stop) are overridden, not merged
23-
" Example override:
24-
" !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647}
25-
if !exists("g:llama_api_url")
26-
let g:llama_api_url= "127.0.0.1:8080"
27-
endif
28-
if !exists("g:llama_overrides")
29-
let g:llama_overrides = {}
30-
endif
31-
const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true }
32-
const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"]
33-
let s:linedict = {}
3412

35-
func s:callbackHandler(bufn, channel, msg)
36-
if len(a:msg) < 3
37-
return
38-
elseif a:msg[0] == "d"
39-
let l:msg = a:msg[6:-1]
40-
else
41-
let l:msg = a:msg
42-
endif
43-
let l:decoded_msg = json_decode(l:msg)
44-
let l:newtext = split(l:decoded_msg['content'], "\n", 1)
45-
if len(l:newtext) > 0
46-
call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0])
47-
else
48-
echo "nothing genned"
49-
endif
50-
if len(newtext) > 1
51-
let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1])
52-
let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1
53-
endif
54-
if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop
55-
echo "Finished generation"
56-
endif
57-
endfunction
13+
let s:default_config = {
14+
\ 'prefix_lines': 32,
15+
\ 'suffix_lines': 32,
16+
\ 'endpoint': 'http://127.0.0.1:8012/infill',
17+
\ 'stop': ["\n"],
18+
\ 'n_predict': 64,
19+
\ 'n_probs': 3,
20+
\ 'temperature': 0.1
21+
\}
5822

59-
func llama#doLlamaGen()
60-
if exists("b:job")
61-
if job_status(b:job) == "run"
62-
call job_stop(b:job)
63-
return
64-
endif
65-
endif
23+
let g:llama_config = get(g:, 'llama_config', s:default_config)
6624

67-
let l:cbuffer = bufnr("%")
68-
let s:linedict[l:cbuffer] = line('$')
69-
let l:buflines = getbufline(l:cbuffer, 1, 1000)
70-
let l:querydata = copy(s:querydata)
71-
call extend(l:querydata, g:llama_overrides)
72-
if exists("w:llama_overrides")
73-
call extend(l:querydata, w:llama_overrides)
74-
endif
75-
if exists("b:llama_overrides")
76-
call extend(l:querydata, b:llama_overrides)
77-
endif
78-
if l:buflines[0][0:1] == '!*'
79-
let l:userdata = json_decode(l:buflines[0][2:-1])
80-
call extend(l:querydata, l:userdata)
81-
let l:buflines = l:buflines[1:-1]
82-
endif
83-
let l:querydata.prompt = join(l:buflines, "\n")
84-
let l:curlcommand = copy(s:curlcommand)
85-
if exists("g:llama_api_key")
86-
call extend(l:curlcommand, ['--header', 'Authorization: Bearer ' .. g:llama_api_key])
87-
endif
88-
let l:curlcommand[2] = json_encode(l:querydata)
89-
let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])})
90-
endfunction
25+
function! llama#fim() abort
26+
let l:lines_prefix = getline(max([1, line('.') - g:llama_config.suffix_lines]), line('.') - 1)
27+
let l:lines_suffix = getline(line('.') + 1, min([line('$'), line('.') + g:llama_config.prefix_lines]))
9128

92-
" Echos the tokkenization of the provided string , or cursor to end of word
93-
" Onus is placed on the user to include the preceding space
94-
func llama#tokenizeWord(...)
95-
if (a:0 > 0)
96-
let l:input = a:1
97-
else
98-
exe "normal \"*ye"
99-
let l:input = @*
100-
endif
101-
let l:querydata = {"content": l:input}
102-
let l:curlcommand = copy(s:curlcommand)
103-
let l:curlcommand[2] = json_encode(l:querydata)
104-
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
105-
let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])})
106-
endfunction
29+
let l:cursor_col = col('.')
10730

108-
func s:tokenizeWordCallback(plaintext, channel, msg)
109-
echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens)
110-
endfunction
31+
let l:line_cur = getline('.')
32+
let l:line_cur_prefix = strpart(l:line_cur, 0, l:cursor_col)
33+
let l:line_cur_suffix = strpart(l:line_cur, l:cursor_col)
11134

35+
let l:prefix = ""
36+
\ . join(l:lines_prefix, "\n")
37+
\ . "\n"
38+
\ . l:line_cur_prefix
11239

113-
" Echos the token count of the entire buffer (or provided string)
114-
" Example usage :echo llama#tokenCount()
115-
func llama#tokenCount(...)
116-
if (a:0 > 0)
117-
let l:buflines = a:1
118-
else
119-
let l:buflines = getline(1,1000)
120-
if l:buflines[0][0:1] == '!*'
121-
let l:buflines = l:buflines[1:-1]
122-
endif
123-
let l:buflines = join(l:buflines, "\n")
124-
endif
125-
let l:querydata = {"content": l:buflines}
126-
let l:curlcommand = copy(s:curlcommand)
127-
let l:curlcommand[2] = json_encode(l:querydata)
128-
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
129-
let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"})
130-
endfunction
40+
let l:suffix = ""
41+
\ . l:line_cur_suffix
42+
\ . join(l:lines_suffix, "\n")
43+
44+
let l:request = json_encode({
45+
\ 'prompt': "",
46+
\ 'input_prefix': l:prefix,
47+
\ 'input_suffix': l:suffix,
48+
"\ 'stop': g:llama_config.stop,
49+
\ 'n_predict': g:llama_config.n_predict,
50+
"\ 'n_probs': g:llama_config.n_probs,
51+
\ 'penalty_last_n': 0,
52+
\ 'temperature': g:llama_config.temperature,
53+
\ 'top_k': 10,
54+
\ 'stream': v:false,
55+
\ 'samplers': ["top_k"]
56+
\ })
57+
58+
" request completion from the server
59+
let l:curl_command = printf(
60+
\ "curl --silent --no-buffer --request POST --url %s --header \"Content-Type: application/json\" --data %s",
61+
\ g:llama_config.endpoint, shellescape(l:request)
62+
\ )
63+
64+
let l:response = json_decode(system(l:curl_command))
65+
66+
echom l:response
67+
68+
let l:content = []
69+
for l:part in split(get(l:response, 'content', ''), "\n", 1)
70+
call add(l:content, l:part)
71+
endfor
72+
73+
echom l:content
74+
75+
" insert the 'content' at the current cursor location
76+
let l:content[0] = l:line_cur_prefix . l:content[0]
77+
let l:content[-1] .= l:line_cur_suffix
13178

132-
func s:tokenCountCallback(channel, msg)
133-
let resp = json_decode(a:msg)
134-
echo len(resp.tokens)
79+
call setline('.', l:content[0])
80+
call append (line('.'), l:content[1:-1])
13581
endfunction

0 commit comments

Comments
 (0)