@@ -2707,6 +2707,31 @@ def last_image_embed_free():
2707
2707
def load_image (self , image_url : str ) -> bytes :
2708
2708
return self ._load_image (image_url )
2709
2709
2710
+ def _embed_image_bytes (self , image_bytes : bytes , n_threads_batch : int = 1 ):
2711
+ if (
2712
+ self ._last_image_embed is not None
2713
+ and self ._last_image_hash is not None
2714
+ and hash (image_bytes ) == self ._last_image_hash
2715
+ ):
2716
+ return self ._last_image_embed
2717
+ with suppress_stdout_stderr (disable = self .verbose ):
2718
+ # Free the previous image embed
2719
+ if self ._last_image_embed is not None :
2720
+ self ._llava_cpp .llava_image_embed_free (self ._last_image_embed )
2721
+ self ._last_image_embed = None
2722
+ self ._last_image_hash = None
2723
+ embed = self ._llava_cpp .llava_image_embed_make_with_bytes (
2724
+ self .clip_ctx ,
2725
+ n_threads_batch ,
2726
+ (ctypes .c_uint8 * len (image_bytes )).from_buffer (
2727
+ bytearray (image_bytes )
2728
+ ),
2729
+ len (image_bytes ),
2730
+ )
2731
+ self ._last_image_embed = embed
2732
+ self ._last_image_hash = hash (image_bytes )
2733
+ return embed
2734
+
2710
2735
def __call__ (
2711
2736
self ,
2712
2737
* ,
@@ -2769,30 +2794,9 @@ def __call__(
2769
2794
)
2770
2795
split_text = self .split_text_on_image_urls (text , image_urls )
2771
2796
2772
- def embed_image_bytes (image_bytes : bytes ):
2773
- if (
2774
- self ._last_image_embed is not None
2775
- and self ._last_image_hash is not None
2776
- and hash (image_bytes ) == self ._last_image_hash
2777
- ):
2778
- return self ._last_image_embed
2779
- with suppress_stdout_stderr (disable = self .verbose ):
2780
- # Free the previous image embed
2781
- if self ._last_image_embed is not None :
2782
- self ._llava_cpp .llava_image_embed_free (self ._last_image_embed )
2783
- self ._last_image_embed = None
2784
- self ._last_image_hash = None
2785
- embed = self ._llava_cpp .llava_image_embed_make_with_bytes (
2786
- self .clip_ctx ,
2787
- llama .context_params .n_threads_batch ,
2788
- (ctypes .c_uint8 * len (image_bytes )).from_buffer (
2789
- bytearray (image_bytes )
2790
- ),
2791
- len (image_bytes ),
2792
- )
2793
- self ._last_image_embed = embed
2794
- self ._last_image_hash = hash (image_bytes )
2795
- return embed
2797
+ if self .verbose :
2798
+ print (text , file = sys .stderr )
2799
+
2796
2800
2797
2801
# Evaluate prompt
2798
2802
llama .reset ()
@@ -2809,7 +2813,7 @@ def embed_image_bytes(image_bytes: bytes):
2809
2813
llama .eval (tokens )
2810
2814
else :
2811
2815
image_bytes = self .load_image (value )
2812
- embed = embed_image_bytes (image_bytes )
2816
+ embed = self . _embed_image_bytes (image_bytes , llama . context_params . n_threads_batch )
2813
2817
if llama .n_tokens + embed .contents .n_image_pos > llama .n_ctx ():
2814
2818
raise ValueError (
2815
2819
f"Prompt exceeds n_ctx: { llama .n_tokens + embed .contents .n_image_pos } > { llama .n_ctx ()} "
@@ -3308,6 +3312,44 @@ class Llama3VisionAlphaChatHandler(Llava15ChatHandler):
3308
3312
Llama3VisionAlpha = Llama3VisionAlphaChatHandler
3309
3313
3310
3314
3315
+ class MiniCPMv26 (Llava15ChatHandler ):
3316
+ DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
3317
+
3318
+ CHAT_FORMAT = (
3319
+ "{% for message in messages %}"
3320
+ "{% if loop.first and messages[0]['role'] != 'system' %}"
3321
+ "<|im_start|>system\n You are a helpful assistant.<|im_end|>\n "
3322
+ "{% endif %}"
3323
+ "<|im_start|>{{ message['role'] }}\n "
3324
+ "{% if message['content'] is iterable %}"
3325
+ "{% for content in message['content'] %}"
3326
+ "{% if content.type == 'image_url' %}"
3327
+ "{% if content.image_url is string %}"
3328
+ "{{ content.image_url }}"
3329
+ "{% endif %}"
3330
+ "{% if content.image_url is mapping %}"
3331
+ "{{ content.image_url.url }}"
3332
+ "{% endif %}"
3333
+ "{% endif %}"
3334
+ "{% endfor %}"
3335
+
3336
+ "{% for content in message['content'] %}"
3337
+ "{% if content.type == 'text' %}"
3338
+ "{{ content.text }}"
3339
+ "{% endif %}"
3340
+ "{% endfor %}"
3341
+ "{% endif %}"
3342
+ "{% if message['content'] is string %}"
3343
+ "{{ message['content'] }}"
3344
+ "{% endif %}"
3345
+ "<|im_end|>\n "
3346
+ "{% endfor %}"
3347
+ "{% if add_generation_prompt %}"
3348
+ "<|im_start|>assistant\n "
3349
+ "{% endif %}"
3350
+ )
3351
+
3352
+
3311
3353
@register_chat_completion_handler ("chatml-function-calling" )
3312
3354
def chatml_function_calling (
3313
3355
llama : llama .Llama ,
0 commit comments