Skip to content

Commit 98e4186

Browse files
committed
Merge branch 'master' into qkv
2 parents 32dc2d5 + 190a37d commit 98e4186

File tree

6 files changed

+420
-69
lines changed

6 files changed

+420
-69
lines changed

examples/llama.vim

+128-42
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
"
33
" requires:
44
"
5-
" - neovim
5+
" - neovim or vim
66
" - curl
77
" - llama.cpp server instance
88
" - FIM-compatible model
99
"
1010
" sample config:
1111
"
1212
" - Tab - accept the current suggestion
13-
" - Shift+Tab - accept just the first line of the segguestion
13+
" - Shift+Tab - accept just the first line of the suggestion
1414
" - Ctrl+F - toggle FIM completion manually
1515
"
1616
" make symlink or copy this file to ~/.config/nvim/autoload/llama.vim
@@ -43,8 +43,8 @@
4343
"
4444

4545
" colors (adjust to your liking)
46-
highlight llama_hl_hint guifg=#ff772f
47-
highlight llama_hl_info guifg=#77ff2f
46+
highlight llama_hl_hint guifg=#ff772f ctermfg=202
47+
highlight llama_hl_info guifg=#77ff2f ctermfg=119
4848

4949
" general parameters:
5050
"
@@ -81,7 +81,7 @@ let s:default_config = {
8181
\ 'n_suffix': 64,
8282
\ 'n_predict': 128,
8383
\ 't_max_prompt_ms': 500,
84-
\ 't_max_predict_ms': 1000,
84+
\ 't_max_predict_ms': 3000,
8585
\ 'show_info': 2,
8686
\ 'auto_fim': v:true,
8787
\ 'max_line_suffix': 8,
@@ -93,6 +93,18 @@ let s:default_config = {
9393

9494
let g:llama_config = get(g:, 'llama_config', s:default_config)
9595

96+
function! s:get_indent(str)
97+
let l:count = 0
98+
for i in range(len(a:str))
99+
if a:str[i] == "\t"
100+
let l:count += &tabstop - 1
101+
else
102+
break
103+
endif
104+
endfor
105+
return l:count
106+
endfunction
107+
96108
function! s:rand(i0, i1) abort
97109
return a:i0 + rand() % (a:i1 - a:i0 + 1)
98110
endfunction
@@ -129,6 +141,21 @@ function! llama#init()
129141

130142
let s:current_job = v:null
131143

144+
let s:ghost_text_nvim = exists('*nvim_buf_get_mark')
145+
let s:ghost_text_vim = has('textprop')
146+
147+
if s:ghost_text_vim
148+
let s:hlgroup_hint = 'llama_hl_hint'
149+
let s:hlgroup_info = 'llama_hl_info'
150+
151+
if empty(prop_type_get(s:hlgroup_hint))
152+
call prop_type_add(s:hlgroup_hint, {'highlight': s:hlgroup_hint})
153+
endif
154+
if empty(prop_type_get(s:hlgroup_info))
155+
call prop_type_add(s:hlgroup_info, {'highlight': s:hlgroup_info})
156+
endif
157+
endif
158+
132159
augroup llama
133160
autocmd!
134161
autocmd InsertEnter * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false)
@@ -317,13 +344,22 @@ function! s:ring_update()
317344
\ 't_max_predict_ms': 1
318345
\ })
319346

320-
let l:curl_command = printf(
321-
\ "curl --silent --no-buffer --request POST --url %s --header \"Content-Type: application/json\" --data %s",
322-
\ g:llama_config.endpoint, shellescape(l:request)
323-
\ )
347+
let l:curl_command = [
348+
\ "curl",
349+
\ "--silent",
350+
\ "--no-buffer",
351+
\ "--request", "POST",
352+
\ "--url", g:llama_config.endpoint,
353+
\ "--header", "Content-Type: application/json",
354+
\ "--data", l:request
355+
\ ]
324356

325357
" no callbacks because we don't need to process the response
326-
call jobstart(l:curl_command, {})
358+
if s:ghost_text_nvim
359+
call jobstart(l:curl_command, {})
360+
elseif s:ghost_text_vim
361+
call job_start(l:curl_command, {})
362+
endif
327363
endfunction
328364

329365
" necessary for 'inoremap <expr>'
@@ -418,24 +454,37 @@ function! llama#fim(is_auto) abort
418454
\ 't_max_predict_ms': g:llama_config.t_max_predict_ms
419455
\ })
420456

421-
let l:curl_command = printf(
422-
\ "curl --silent --no-buffer --request POST --url %s --header \"Content-Type: application/json\" --data %s",
423-
\ g:llama_config.endpoint, shellescape(l:request)
424-
\ )
457+
let l:curl_command = [
458+
\ "curl",
459+
\ "--silent",
460+
\ "--no-buffer",
461+
\ "--request", "POST",
462+
\ "--url", g:llama_config.endpoint,
463+
\ "--header", "Content-Type: application/json",
464+
\ "--data", l:request
465+
\ ]
425466

426467
if s:current_job != v:null
427-
call jobstop(s:current_job)
468+
if s:ghost_text_nvim
469+
call jobstop(s:current_job)
470+
elseif s:ghost_text_vim
471+
call job_stop(s:current_job)
472+
endif
428473
endif
429474

430475
" send the request asynchronously
431-
let s:current_job = jobstart(l:curl_command, {
432-
\ 'on_stdout': function('s:fim_on_stdout'),
433-
\ 'on_exit': function('s:fim_on_exit'),
434-
\ 'stdout_buffered': v:true,
435-
\ 'pos_x': s:pos_x,
436-
\ 'pos_y': s:pos_y,
437-
\ 'is_auto': a:is_auto
438-
\ })
476+
if s:ghost_text_nvim
477+
let s:current_job = jobstart(l:curl_command, {
478+
\ 'on_stdout': function('s:fim_on_stdout', [s:pos_x, s:pos_y, a:is_auto]),
479+
\ 'on_exit': function('s:fim_on_exit'),
480+
\ 'stdout_buffered': v:true
481+
\ })
482+
elseif s:ghost_text_vim
483+
let s:current_job = job_start(l:curl_command, {
484+
\ 'out_cb': function('s:fim_on_stdout', [s:pos_x, s:pos_y, a:is_auto]),
485+
\ 'exit_cb': function('s:fim_on_exit')
486+
\ })
487+
endif
439488

440489
" TODO: per-file location
441490
let l:delta_y = abs(s:pos_y - s:pos_y_pick)
@@ -482,9 +531,13 @@ function! llama#fim_cancel()
482531
" clear the virtual text
483532
let l:bufnr = bufnr('%')
484533

485-
let l:id_vt_fim = nvim_create_namespace('vt_fim')
486-
487-
call nvim_buf_clear_namespace(l:bufnr, l:id_vt_fim, 0, -1)
534+
if s:ghost_text_nvim
535+
let l:id_vt_fim = nvim_create_namespace('vt_fim')
536+
call nvim_buf_clear_namespace(l:bufnr, l:id_vt_fim, 0, -1)
537+
elseif s:ghost_text_vim
538+
call prop_remove({'type': s:hlgroup_hint, 'all': v:true})
539+
call prop_remove({'type': s:hlgroup_info, 'all': v:true})
540+
endif
488541

489542
" remove the mappings
490543
silent! iunmap <buffer> <Tab>
@@ -499,13 +552,18 @@ function! s:on_move()
499552
endfunction
500553

501554
" callback that processes the FIM result from the server and displays the suggestion
502-
function! s:fim_on_stdout(job_id, data, event) dict
503-
let l:raw = join(a:data, "\n")
555+
function! s:fim_on_stdout(pos_x, pos_y, is_auto, job_id, data, event = v:null)
556+
if s:ghost_text_nvim
557+
let l:raw = join(a:data, "\n")
558+
elseif s:ghost_text_vim
559+
let l:raw = a:data
560+
endif
561+
504562
if len(l:raw) == 0
505563
return
506564
endif
507565

508-
if self.pos_x != col('.') - 1 || self.pos_y != line('.')
566+
if a:pos_x != col('.') - 1 || a:pos_y != line('.')
509567
return
510568
endif
511569

@@ -514,14 +572,14 @@ function! s:fim_on_stdout(job_id, data, event) dict
514572
return
515573
endif
516574

517-
let s:pos_x = self.pos_x
518-
let s:pos_y = self.pos_y
575+
let s:pos_x = a:pos_x
576+
let s:pos_y = a:pos_y
519577

520578
let s:can_accept = v:true
521579
let l:has_info = v:false
522580

523581
if s:can_accept && v:shell_error
524-
if !self.is_auto
582+
if !a:is_auto
525583
call add(s:content, "<| curl error: is the server on? |>")
526584
endif
527585
let s:can_accept = v:false
@@ -642,7 +700,9 @@ function! s:fim_on_stdout(job_id, data, event) dict
642700
" display virtual text with the suggestion
643701
let l:bufnr = bufnr('%')
644702

645-
let l:id_vt_fim = nvim_create_namespace('vt_fim')
703+
if s:ghost_text_nvim
704+
let l:id_vt_fim = nvim_create_namespace('vt_fim')
705+
endif
646706

647707
" construct the info message
648708
if g:llama_config.show_info > 0 && l:has_info
@@ -671,15 +731,41 @@ function! s:fim_on_stdout(job_id, data, event) dict
671731
endif
672732

673733
" display the suggestion and append the info to the end of the first line
674-
call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, s:pos_y - 1, s:pos_x - 1, {
675-
\ 'virt_text': [[s:content[0], 'llama_hl_hint'], [l:info, 'llama_hl_info']],
676-
\ 'virt_text_win_col': virtcol('.') - 1
677-
\ })
734+
if s:ghost_text_nvim
735+
call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, s:pos_y - 1, s:pos_x - 1, {
736+
\ 'virt_text': [[s:content[0], 'llama_hl_hint'], [l:info, 'llama_hl_info']],
737+
\ 'virt_text_win_col': virtcol('.') - 1
738+
\ })
678739

679-
call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, s:pos_y - 1, 0, {
680-
\ 'virt_lines': map(s:content[1:], {idx, val -> [[val, 'llama_hl_hint']]}),
681-
\ 'virt_text_win_col': virtcol('.')
682-
\ })
740+
call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, s:pos_y - 1, 0, {
741+
\ 'virt_lines': map(s:content[1:], {idx, val -> [[val, 'llama_hl_hint']]}),
742+
\ 'virt_text_win_col': virtcol('.')
743+
\ })
744+
elseif s:ghost_text_vim
745+
let l:new_suffix = s:content[0]
746+
if !empty(l:new_suffix)
747+
call prop_add(s:pos_y, s:pos_x + 1, {
748+
\ 'type': s:hlgroup_hint,
749+
\ 'text': l:new_suffix
750+
\ })
751+
endif
752+
for line in s:content[1:]
753+
call prop_add(s:pos_y, 0, {
754+
\ 'type': s:hlgroup_hint,
755+
\ 'text': line,
756+
\ 'text_padding_left': s:get_indent(line),
757+
\ 'text_align': 'below'
758+
\ })
759+
endfor
760+
if !empty(l:info)
761+
call prop_add(s:pos_y, 0, {
762+
\ 'type': s:hlgroup_info,
763+
\ 'text': l:info,
764+
\ 'text_padding_left': col('$'),
765+
\ 'text_wrap': 'truncate'
766+
\ })
767+
endif
768+
endif
683769

684770
" setup accept shortcuts
685771
inoremap <buffer> <Tab> <C-O>:call llama#fim_accept(v:false)<CR>
@@ -688,7 +774,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
688774
let s:hint_shown = v:true
689775
endfunction
690776

691-
function! s:fim_on_exit(job_id, exit_code, event) dict
777+
function! s:fim_on_exit(job_id, exit_code, event = v:null)
692778
if a:exit_code != 0
693779
echom "Job failed with exit code: " . a:exit_code
694780
endif

ggml/src/ggml-cuda.cu

-1
Original file line numberDiff line numberDiff line change
@@ -3153,7 +3153,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
31533153
case GGML_OP_ROPE:
31543154
return ggml_is_contiguous(op->src[0]);
31553155
case GGML_OP_IM2COL:
3156-
return op->src[0]->type == GGML_TYPE_F16;
31573156
case GGML_OP_POOL_2D:
31583157
case GGML_OP_SUM:
31593158
case GGML_OP_SUM_ROWS:

ggml/src/ggml-cuda/im2col.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
9191
const int64_t OH = is_2D ? dst->ne[2] : 1;
9292
const int64_t OW = dst->ne[1];
9393

94-
const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
95-
const int64_t batch = src1->ne[3];
96-
const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32
94+
const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
95+
const int64_t batch = src1->ne[is_2D ? 3 : 2];
96+
const size_t batch_offset = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32
9797

9898
if(dst->type == GGML_TYPE_F16) {
9999
im2col_cuda_f16(src1_d, (half *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream);

0 commit comments

Comments
 (0)