llava cmakelists (#10127)

lucylq · web-flow · commit a664d7b3616e · 2025-04-12T09:28:36.000-07:00
See issue: #10096 Copy llama Cmakelists.txt to link custom ops into the binary: https://github.com/pytorch/executorch/blob/409447d75a1524c1acc8f8ea894c2e13dd723a79/examples/models/llama/CMakeLists.txt#L114 Test plan: Build: ``` cmake -DPYTHON_EXECUTABLE=python -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DCMAKE_PREFIX_PATH="/home/lfq/.conda/envs/executorch/lib/python3.10/site-packages" -Bcmake-out/examples/models/llava examples/models/llava cmake --build cmake-out/examples/models/llava/ -j8 --config Debug ``` Run: ``` cmake-out/examples/models/llava/llava_main --model_path=llava.pte --tokenizer_path=tokenizer.bin --image_path=image.pt --prompt="ASSISTANT:" --temperature=0 --seq_len=650 age.pt --prompt="ASSISTANT:" --temperature=0 --seq_len=650 I 00:00:00.001282 executorch:cpuinfo_utils.cpp:62] Reading file /sys/devices/soc0/image_version I 00:00:00.001330 executorch:cpuinfo_utils.cpp:78] Failed to open midr file /sys/devices/soc0/image_version I 00:00:00.001353 executorch:cpuinfo_utils.cpp:91] Reading file /sys/devices/system/cpu/cpu0/regs/identification/midr_el1 I 00:00:00.001380 executorch:cpuinfo_utils.cpp:100] Failed to open midr file /sys/devices/system/cpu/cpu0/regs/identification/midr_el1 I 00:00:00.001390 executorch:cpuinfo_utils.cpp:116] CPU info and manual query on # of cpus dont match. I 00:00:00.001397 executorch:main.cpp:77] Resetting threadpool with num threads = 0 I 00:00:00.001412 executorch:multimodal_runner.h:45] Creating Multimodal LLM runner: model_path=llava.pte, tokenizer_path=tokenizer.bin I 00:00:00.025122 executorch:main.cpp:107] image size(0): 3, size(1): 240, size(2): 336 I 00:00:21.793359 executorch:llava_runner.cpp:142] RSS after loading model: 6123.457031 MiB (0 if unsupported) I 00:00:23.059576 executorch:text_prefiller.cpp:95] Prefill token result numel(): 32064 I 00:00:33.459186 executorch:llava_runner.cpp:166] RSS after prompt and image prefill: 6255.707031 MiB (0 if unsupported) ASSISTANT:I 00:00:33.948606 executorch:text_prefiller.cpp:95] Prefill token result numel(): 32064 image captures a basketball game in progress, with several players on the court. One player is in the middle of a dunk, while another player is attempting toPyTorchObserver {"prompt_tokens":616,"generated_tokens":33,"model_load_start_ms":1744415212709,"model_load_end_ms":1744415234476,"inference_start_ms":1744415234477,"inference_end_ms":1744415259787,"prompt_eval_end_ms":1744415246632,"first_token_ms":1744415246632,"aggregate_sampling_time_ms":2883588,"SCALING_FACTOR_UNITS_PER_SECOND":1000} I 00:00:47.103512 executorch:stats.h:104] Prompt Tokens: 616 Generated Tokens: 33 I 00:00:47.103520 executorch:stats.h:110] Model Load Time: 21.767000 (seconds) I 00:00:47.103528 executorch:stats.h:117] Total inference time: 25.310000 (seconds) Rate: 1.303832 (tokens/second) I 00:00:47.103533 executorch:stats.h:127] Prompt evaluation: 12.155000 (seconds) Rate: 50.678733 (tokens/second) I 00:00:47.103538 executorch:stats.h:136] Generated 33 tokens: 13.155000 (seconds) Rate: 2.508552 (tokens/second) I 00:00:47.103542 executorch:stats.h:147] Time to first generated token: 12.155000 (seconds) I 00:00:47.103545 executorch:stats.h:153] Sampling time over 649 tokens: 2883.588000 (seconds) I 00:00:47.103549 executorch:llava_runner.cpp:178] RSS after finishing text generation: 6255.707031 MiB (0 if unsupported) ```
diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh
@@ -154,7 +154,7 @@ run_and_verify() {
         EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
     else
         # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
-        EXPECTED_PREFIX="ASSISTANT:"
+        EXPECTED_PREFIX="ASSISTANT: image"
     fi
     if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
         echo "Expected result prefix: ${EXPECTED_PREFIX}"
diff --git a/examples/models/llava/CMakeLists.txt b/examples/models/llava/CMakeLists.txt
@@ -15,7 +15,7 @@
 # ~~~
 # It should also be cmake-lint clean.
 #
-cmake_minimum_required(VERSION 3.19)
+cmake_minimum_required(VERSION 3.24)  # 3.24 is required for WHOLE_ARCHIVE
 project(llava)
 
 # Duplicating options as root CMakeLists.txt
@@ -124,7 +124,7 @@ target_link_options_shared_lib(quantized_ops_lib)
 list(APPEND link_libraries quantized_kernels quantized_ops_lib)
 
 if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
-  list(APPEND link_libraries custom_ops)
+  list(APPEND link_libraries $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
 endif()
 
 set(XNNPACK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack)