Skip to content

Commit 79245f7

Browse files
DarkLight1337mzusman
authored andcommitted
[Bugfix] Fix LLaVA-NeXT feature size precision error (for real) (vllm-project#11772)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 1d48e92 commit 79245f7

File tree

4 files changed

+47
-45
lines changed

4 files changed

+47
-45
lines changed

tests/models/decoder_only/vision_language/processing/test_llava_next.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ def processor_for_llava_next():
1717

1818
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
1919
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
20-
(488, 183), (198, 176), (176, 198)])
20+
(488, 183), (198, 176), (176, 198),
21+
(161, 184), (184, 161)])
2122
@pytest.mark.parametrize("num_imgs", [1, 2])
2223
def test_processor_prompt_replacements(
2324
processor_for_llava_next,

tests/models/decoder_only/vision_language/processing/test_llava_onevision.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ def processor_for_llava_onevision():
1818
@pytest.mark.parametrize("model_id",
1919
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
2020
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
21-
(488, 183), (198, 176), (176, 198)])
21+
(488, 183), (198, 176), (176, 198),
22+
(161, 184), (184, 161)])
2223
@pytest.mark.parametrize("num_imgs", [1, 2])
2324
def test_processor_prompt_replacements(
2425
processor_for_llava_onevision,

vllm/model_executor/models/llava_next.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -121,30 +121,29 @@ def _get_num_unpadded_features(
121121
num_patch_height: int,
122122
num_patch_width: int,
123123
) -> tuple[int, int]:
124-
current_height = npatches * num_patch_height
125-
current_width = npatches * num_patch_width
126-
127124
# NOTE: Use float32 to remain consistent with HF output
128-
original_aspect_ratio = np.array(original_width / original_height,
129-
dtype=np.float32)
130-
current_aspect_ratio = np.array(current_width / current_height,
131-
dtype=np.float32)
125+
current_height_f = np.float32(npatches * num_patch_height)
126+
current_width_f = np.float32(npatches * num_patch_width)
127+
128+
original_width_f = np.float32(original_width)
129+
original_height_f = np.float32(original_height)
130+
131+
original_aspect_ratio = original_width_f / original_height_f
132+
current_aspect_ratio = current_width_f / current_height_f
132133

133134
if original_aspect_ratio > current_aspect_ratio:
134-
scale_factor = np.array(current_width / original_width,
135-
dtype=np.float32)
136-
new_height = int(original_height * scale_factor)
137-
padding = (current_height - new_height) // 2
138-
current_height -= 2 * padding
135+
scale_factor = current_width_f / original_width_f
136+
new_height = int(original_height_f * scale_factor)
137+
padding = (current_height_f - new_height) // 2
138+
current_height_f -= 2 * padding
139139
else:
140-
scale_factor = np.array(current_height / original_height,
141-
dtype=np.float32)
142-
new_width = int(original_width * scale_factor)
143-
padding = (current_width - new_width) // 2
144-
current_width -= 2 * padding
145-
146-
unpadded_features = current_height * current_width
147-
newline_features = current_height
140+
scale_factor = current_height_f / original_height_f
141+
new_width = int(original_width_f * scale_factor)
142+
padding = (current_width_f - new_width) // 2
143+
current_width_f -= 2 * padding
144+
145+
unpadded_features = int(current_height_f * current_width_f)
146+
newline_features = int(current_height_f)
148147

149148
return (unpadded_features, newline_features)
150149

vllm/model_executor/models/llava_onevision.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -107,36 +107,37 @@ def _get_num_unpadded_features(
107107
num_patch_height: int,
108108
num_patch_width: int,
109109
) -> tuple[int, int]:
110-
current_height = npatches * num_patch_height
111-
current_width = npatches * num_patch_width
112-
113110
# NOTE: Use float32 to remain consistent with HF output
114-
original_aspect_ratio = np.array(original_width / original_height,
115-
dtype=np.float32)
116-
current_aspect_ratio = np.array(current_width / current_height,
117-
dtype=np.float32)
111+
current_height_f = np.float32(npatches * num_patch_height)
112+
current_width_f = np.float32(npatches * num_patch_width)
113+
114+
original_width_f = np.float32(original_width)
115+
original_height_f = np.float32(original_height)
116+
117+
original_aspect_ratio = original_width_f / original_height_f
118+
current_aspect_ratio = current_width_f / current_height_f
118119

119120
if original_aspect_ratio > current_aspect_ratio:
120-
scale_factor = np.array(current_width / original_width,
121-
dtype=np.float32)
122-
new_height = int(original_height * scale_factor)
123-
padding = (current_height - new_height) // 2
124-
current_height -= 2 * padding
121+
scale_factor = current_width_f / original_width_f
122+
new_height = int(original_height_f * scale_factor)
123+
padding = (current_height_f - new_height) // 2
124+
current_height_f -= 2 * padding
125125
else:
126-
scale_factor = np.array(current_height / original_height,
127-
dtype=np.float32)
128-
new_width = int(original_width * scale_factor)
129-
padding = (current_width - new_width) // 2
130-
current_width -= 2 * padding
126+
scale_factor = current_height_f / original_height_f
127+
new_width = int(original_width_f * scale_factor)
128+
padding = (current_width_f - new_width) // 2
129+
current_width_f -= 2 * padding
131130

132-
unpadded_features = current_height * current_width
133-
newline_features = current_height
131+
unpadded_features = int(current_height_f * current_width_f)
132+
newline_features = int(current_height_f)
134133

135-
ratio = math.sqrt(current_height * current_width / (9 * npatches**2))
134+
ratio = math.sqrt(current_height_f * current_width_f /
135+
(9 * npatches**2))
136136
if ratio > 1.1:
137-
unpadded_features = int(current_height // ratio) * int(
138-
current_width // ratio)
139-
newline_features = int(current_height // ratio)
137+
height_factor = int(current_height_f // ratio)
138+
width_factor = int(current_width_f // ratio)
139+
unpadded_features = height_factor * width_factor
140+
newline_features = height_factor
140141

141142
return (unpadded_features, newline_features)
142143

0 commit comments

Comments
 (0)