Skip to content

Commit a664d7b

Browse files
authored
llava cmakelists (#10127)
See issue: #10096 Copy llama Cmakelists.txt to link custom ops into the binary: https://github.com/pytorch/executorch/blob/409447d75a1524c1acc8f8ea894c2e13dd723a79/examples/models/llama/CMakeLists.txt#L114 Test plan: Build: ``` cmake -DPYTHON_EXECUTABLE=python -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DCMAKE_PREFIX_PATH="/home/lfq/.conda/envs/executorch/lib/python3.10/site-packages" -Bcmake-out/examples/models/llava examples/models/llava cmake --build cmake-out/examples/models/llava/ -j8 --config Debug ``` Run: ``` cmake-out/examples/models/llava/llava_main --model_path=llava.pte --tokenizer_path=tokenizer.bin --image_path=image.pt --prompt="ASSISTANT:" --temperature=0 --seq_len=650 age.pt --prompt="ASSISTANT:" --temperature=0 --seq_len=650 I 00:00:00.001282 executorch:cpuinfo_utils.cpp:62] Reading file /sys/devices/soc0/image_version I 00:00:00.001330 executorch:cpuinfo_utils.cpp:78] Failed to open midr file /sys/devices/soc0/image_version I 00:00:00.001353 executorch:cpuinfo_utils.cpp:91] Reading file /sys/devices/system/cpu/cpu0/regs/identification/midr_el1 I 00:00:00.001380 executorch:cpuinfo_utils.cpp:100] Failed to open midr file /sys/devices/system/cpu/cpu0/regs/identification/midr_el1 I 00:00:00.001390 executorch:cpuinfo_utils.cpp:116] CPU info and manual query on # of cpus dont match. I 00:00:00.001397 executorch:main.cpp:77] Resetting threadpool with num threads = 0 I 00:00:00.001412 executorch:multimodal_runner.h:45] Creating Multimodal LLM runner: model_path=llava.pte, tokenizer_path=tokenizer.bin I 00:00:00.025122 executorch:main.cpp:107] image size(0): 3, size(1): 240, size(2): 336 I 00:00:21.793359 executorch:llava_runner.cpp:142] RSS after loading model: 6123.457031 MiB (0 if unsupported) I 00:00:23.059576 executorch:text_prefiller.cpp:95] Prefill token result numel(): 32064 I 00:00:33.459186 executorch:llava_runner.cpp:166] RSS after prompt and image prefill: 6255.707031 MiB (0 if unsupported) ASSISTANT:I 00:00:33.948606 executorch:text_prefiller.cpp:95] Prefill token result numel(): 32064 image captures a basketball game in progress, with several players on the court. One player is in the middle of a dunk, while another player is attempting toPyTorchObserver {"prompt_tokens":616,"generated_tokens":33,"model_load_start_ms":1744415212709,"model_load_end_ms":1744415234476,"inference_start_ms":1744415234477,"inference_end_ms":1744415259787,"prompt_eval_end_ms":1744415246632,"first_token_ms":1744415246632,"aggregate_sampling_time_ms":2883588,"SCALING_FACTOR_UNITS_PER_SECOND":1000} I 00:00:47.103512 executorch:stats.h:104] Prompt Tokens: 616 Generated Tokens: 33 I 00:00:47.103520 executorch:stats.h:110] Model Load Time: 21.767000 (seconds) I 00:00:47.103528 executorch:stats.h:117] Total inference time: 25.310000 (seconds) Rate: 1.303832 (tokens/second) I 00:00:47.103533 executorch:stats.h:127] Prompt evaluation: 12.155000 (seconds) Rate: 50.678733 (tokens/second) I 00:00:47.103538 executorch:stats.h:136] Generated 33 tokens: 13.155000 (seconds) Rate: 2.508552 (tokens/second) I 00:00:47.103542 executorch:stats.h:147] Time to first generated token: 12.155000 (seconds) I 00:00:47.103545 executorch:stats.h:153] Sampling time over 649 tokens: 2883.588000 (seconds) I 00:00:47.103549 executorch:llava_runner.cpp:178] RSS after finishing text generation: 6255.707031 MiB (0 if unsupported) ```
1 parent 409447d commit a664d7b

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

.ci/scripts/test_llava.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ run_and_verify() {
154154
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
155155
else
156156
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
157-
EXPECTED_PREFIX="ASSISTANT:"
157+
EXPECTED_PREFIX="ASSISTANT: image"
158158
fi
159159
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
160160
echo "Expected result prefix: ${EXPECTED_PREFIX}"

examples/models/llava/CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# ~~~
1616
# It should also be cmake-lint clean.
1717
#
18-
cmake_minimum_required(VERSION 3.19)
18+
cmake_minimum_required(VERSION 3.24) # 3.24 is required for WHOLE_ARCHIVE
1919
project(llava)
2020

2121
# Duplicating options as root CMakeLists.txt
@@ -124,7 +124,7 @@ target_link_options_shared_lib(quantized_ops_lib)
124124
list(APPEND link_libraries quantized_kernels quantized_ops_lib)
125125

126126
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
127-
list(APPEND link_libraries custom_ops)
127+
list(APPEND link_libraries $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
128128
endif()
129129

130130
set(XNNPACK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack)

0 commit comments

Comments
 (0)