@@ -123,20 +123,25 @@ public struct QwenVL {
123
123
{
124
124
if height < factor {
125
125
throw VLMError . imageProcessingFailure (
126
- " height : \( height) must be larger than factor: \( factor) " )
126
+ " Height : \( height) must be larger than factor: \( factor) " )
127
127
}
128
128
if width < factor {
129
129
throw VLMError . imageProcessingFailure (
130
- " width : \( width) must be larger than factor: \( factor) " )
130
+ " Width : \( width) must be larger than factor: \( factor) " )
131
131
}
132
132
if max ( height, width) / min( height, width) > 200 {
133
133
throw VLMError . imageProcessingFailure (
134
- " absolute aspect ratio must be smaller than 200: \( width) x \( height) " )
134
+ " Absolute aspect ratio must be smaller than 200: \( width) × \( height) " )
135
135
}
136
136
137
+ // Maximum allowed dimension for any single side to prevent buffer overflows
138
+ // This is important for portrait/landscape images with extreme aspect ratios
139
+ let maxDimension = 224
140
+
137
141
var hBar = max ( factor, Int ( round ( Float ( height) / Float( factor) ) ) * factor)
138
142
var wBar = max ( factor, Int ( round ( Float ( width) / Float( factor) ) ) * factor)
139
143
144
+ // Start by scaling based on total pixel count
140
145
if hBar * wBar > maxPixels {
141
146
let beta = sqrt ( Float ( height * width) / Float( maxPixels) )
142
147
hBar = Int ( floor ( Float ( height) / beta / Float( factor) ) ) * factor
@@ -146,6 +151,24 @@ public struct QwenVL {
146
151
hBar = Int ( ceil ( Float ( height) * beta / Float( factor) ) ) * factor
147
152
wBar = Int ( ceil ( Float ( width) * beta / Float( factor) ) ) * factor
148
153
}
154
+
155
+ // Additionally check if either dimension exceeds the maximum allowed
156
+ if hBar > maxDimension {
157
+ // Calculate how much we need to scale down height
158
+ let scale = Float ( maxDimension) / Float( hBar)
159
+ // Apply that scale to both dimensions to maintain aspect ratio
160
+ hBar = Int ( round ( Float ( hBar) * scale / Float( factor) ) ) * factor
161
+ wBar = Int ( round ( Float ( wBar) * scale / Float( factor) ) ) * factor
162
+ }
163
+
164
+ if wBar > maxDimension {
165
+ // Calculate how much we need to scale down width
166
+ let scale = Float ( maxDimension) / Float( wBar)
167
+ // Apply that scale to both dimensions to maintain aspect ratio
168
+ hBar = Int ( round ( Float ( hBar) * scale / Float( factor) ) ) * factor
169
+ wBar = Int ( round ( Float ( wBar) * scale / Float( factor) ) ) * factor
170
+ }
171
+
149
172
return ( hBar, wBar)
150
173
}
151
174
0 commit comments