@@ -121,6 +121,9 @@ public struct QwenVL {
121
121
throws
122
122
-> ( Int , Int )
123
123
{
124
+ print ( " Original dimensions: \( width) × \( height) " )
125
+ print ( " Factor: \( factor) , minPixels: \( minPixels) , maxPixels: \( maxPixels) " )
126
+
124
127
if height < factor {
125
128
throw VLMError . imageProcessingFailure (
126
129
" Height: \( height) must be larger than factor: \( factor) " )
@@ -134,44 +137,28 @@ public struct QwenVL {
134
137
" Absolute aspect ratio must be smaller than 200: \( width) × \( height) " )
135
138
}
136
139
137
- // Maximum allowed dimension for any single side to prevent buffer overflows
138
- // This is important for portrait/landscape images with extreme aspect ratios
139
- let maxDimension = 224
140
-
141
140
var hBar = max ( factor, Int ( round ( Float ( height) / Float( factor) ) ) * factor)
142
141
var wBar = max ( factor, Int ( round ( Float ( width) / Float( factor) ) ) * factor)
142
+ print ( " After rounding to factor multiples: \( wBar) × \( hBar) " )
143
143
144
- // Start by scaling based on total pixel count
144
+ // Scale based on total pixel count
145
145
if hBar * wBar > maxPixels {
146
146
let beta = sqrt ( Float ( height * width) / Float( maxPixels) )
147
147
hBar = Int ( floor ( Float ( height) / beta / Float( factor) ) ) * factor
148
148
wBar = Int ( floor ( Float ( width) / beta / Float( factor) ) ) * factor
149
+ print ( " After scaling down for maxPixels: \( wBar) × \( hBar) " )
149
150
} else if hBar * wBar < minPixels {
150
151
let beta = sqrt ( Float ( minPixels) / Float( height * width) )
151
152
hBar = Int ( ceil ( Float ( height) * beta / Float( factor) ) ) * factor
152
153
wBar = Int ( ceil ( Float ( width) * beta / Float( factor) ) ) * factor
153
- }
154
-
155
- // Additionally check if either dimension exceeds the maximum allowed
156
- if hBar > maxDimension {
157
- // Calculate how much we need to scale down height
158
- let scale = Float ( maxDimension) / Float( hBar)
159
- // Apply that scale to both dimensions to maintain aspect ratio
160
- hBar = Int ( round ( Float ( hBar) * scale / Float( factor) ) ) * factor
161
- wBar = Int ( round ( Float ( wBar) * scale / Float( factor) ) ) * factor
162
- }
163
-
164
- if wBar > maxDimension {
165
- // Calculate how much we need to scale down width
166
- let scale = Float ( maxDimension) / Float( wBar)
167
- // Apply that scale to both dimensions to maintain aspect ratio
168
- hBar = Int ( round ( Float ( hBar) * scale / Float( factor) ) ) * factor
169
- wBar = Int ( round ( Float ( wBar) * scale / Float( factor) ) ) * factor
154
+ print ( " After scaling up for minPixels: \( wBar) × \( hBar) " )
170
155
}
171
156
172
157
// Ensure dimensions are divisible by the factor
173
158
hBar = ( hBar / factor) * factor
174
159
wBar = ( wBar / factor) * factor
160
+ print ( " Final dimensions: \( wBar) × \( hBar) " )
161
+ print ( " Total pixels: \( wBar * hBar) " )
175
162
176
163
// Final sanity check
177
164
if hBar <= 0 || wBar <= 0 {
0 commit comments