Skip to content

Commit 6a917fe

Browse files
committed
Fix downsampling
1 parent 9356549 commit 6a917fe

File tree

2 files changed

+11
-22
lines changed

2 files changed

+11
-22
lines changed

Libraries/MLXVLM/Models/Qwen25VL.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,8 @@ public class Qwen25VLProcessor: UserInputProcessor {
679679
minPixels: config.size.minPixels, maxPixels: config.size.maxPixels)
680680
let resizedSize = CGSize(width: resizedWidth, height: resizedHeight)
681681

682+
print("config.size.maxPixels: \(config.size.maxPixels)")
683+
682684
// Process images
683685
let processedImages =
684686
try images

Libraries/MLXVLM/Models/QwenVL.swift

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ public struct QwenVL {
121121
throws
122122
-> (Int, Int)
123123
{
124+
print("Original dimensions: \(width) × \(height)")
125+
print("Factor: \(factor), minPixels: \(minPixels), maxPixels: \(maxPixels)")
126+
124127
if height < factor {
125128
throw VLMError.imageProcessingFailure(
126129
"Height: \(height) must be larger than factor: \(factor)")
@@ -134,44 +137,28 @@ public struct QwenVL {
134137
"Absolute aspect ratio must be smaller than 200: \(width) × \(height)")
135138
}
136139

137-
// Maximum allowed dimension for any single side to prevent buffer overflows
138-
// This is important for portrait/landscape images with extreme aspect ratios
139-
let maxDimension = 224
140-
141140
var hBar = max(factor, Int(round(Float(height) / Float(factor))) * factor)
142141
var wBar = max(factor, Int(round(Float(width) / Float(factor))) * factor)
142+
print("After rounding to factor multiples: \(wBar) × \(hBar)")
143143

144-
// Start by scaling based on total pixel count
144+
// Scale based on total pixel count
145145
if hBar * wBar > maxPixels {
146146
let beta = sqrt(Float(height * width) / Float(maxPixels))
147147
hBar = Int(floor(Float(height) / beta / Float(factor))) * factor
148148
wBar = Int(floor(Float(width) / beta / Float(factor))) * factor
149+
print("After scaling down for maxPixels: \(wBar) × \(hBar)")
149150
} else if hBar * wBar < minPixels {
150151
let beta = sqrt(Float(minPixels) / Float(height * width))
151152
hBar = Int(ceil(Float(height) * beta / Float(factor))) * factor
152153
wBar = Int(ceil(Float(width) * beta / Float(factor))) * factor
153-
}
154-
155-
// Additionally check if either dimension exceeds the maximum allowed
156-
if hBar > maxDimension {
157-
// Calculate how much we need to scale down height
158-
let scale = Float(maxDimension) / Float(hBar)
159-
// Apply that scale to both dimensions to maintain aspect ratio
160-
hBar = Int(round(Float(hBar) * scale / Float(factor))) * factor
161-
wBar = Int(round(Float(wBar) * scale / Float(factor))) * factor
162-
}
163-
164-
if wBar > maxDimension {
165-
// Calculate how much we need to scale down width
166-
let scale = Float(maxDimension) / Float(wBar)
167-
// Apply that scale to both dimensions to maintain aspect ratio
168-
hBar = Int(round(Float(hBar) * scale / Float(factor))) * factor
169-
wBar = Int(round(Float(wBar) * scale / Float(factor))) * factor
154+
print("After scaling up for minPixels: \(wBar) × \(hBar)")
170155
}
171156

172157
// Ensure dimensions are divisible by the factor
173158
hBar = (hBar / factor) * factor
174159
wBar = (wBar / factor) * factor
160+
print("Final dimensions: \(wBar) × \(hBar)")
161+
print("Total pixels: \(wBar * hBar)")
175162

176163
// Final sanity check
177164
if hBar <= 0 || wBar <= 0 {

0 commit comments

Comments
 (0)