@@ -395,58 +395,22 @@ class VLMEvaluator {
395
395
MLXRandom . seed ( UInt64 ( Date . timeIntervalSinceReferenceDate * 1000 ) )
396
396
397
397
try await modelContainer. perform { ( context: ModelContext ) -> Void in
398
-
399
- let images : [ UserInput . Image ] =
400
- if let image {
401
- [ UserInput . Image. ciImage ( image) ]
402
- } else {
403
- [ ]
404
- }
405
- let videos : [ UserInput . Video ] =
406
- if let videoURL {
407
- [ . url( videoURL) ]
408
- } else {
409
- [ ]
410
- }
411
- let messages : [ [ String : Any ] ] =
412
- if !images. isEmpty || !videos. isEmpty {
413
- [
414
- [
415
- " role " : " system " ,
416
- " content " : [
417
- [
418
- " type " : " text " ,
419
- " text " : videoURL != nil
420
- ? videoSystemPrompt : imageSystemPrompt,
421
- ]
422
- ] ,
423
- ] ,
424
- [
425
- " role " : " user " ,
426
- " content " : [
427
- [
428
- " type " : " text " ,
429
- " text " : prompt,
430
- ]
431
- ]
432
- // Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models.
433
- + images. map { _ in
434
- [ " type " : " image " ]
435
- }
436
- + videos. map { _ in
437
- [ " type " : " video " ]
438
- } ,
439
- ] ,
440
- ]
441
- } else {
442
- [
443
- [
444
- " role " : " user " ,
445
- " content " : prompt,
446
- ]
447
- ]
448
- }
449
- var userInput = UserInput ( messages: messages, images: images, videos: videos)
398
+ let images : [ UserInput . Image ] = if let image { [ . ciImage( image) ] } else { [ ] }
399
+ let videos : [ UserInput . Video ] = if let videoURL { [ . url( videoURL) ] } else { [ ] }
400
+
401
+ let systemPrompt =
402
+ if !videos. isEmpty {
403
+ videoSystemPrompt
404
+ } else if !images. isEmpty {
405
+ imageSystemPrompt
406
+ } else { " You are a helpful assistant. " }
407
+
408
+ let chat : [ Chat . Message ] = [
409
+ . system( systemPrompt) ,
410
+ . user( prompt, images: images, videos: videos) ,
411
+ ]
412
+
413
+ var userInput = UserInput ( chat: chat)
450
414
userInput. processing. resize = . init( width: 448 , height: 448 )
451
415
452
416
let lmInput = try await context. processor. prepare ( input: userInput)
0 commit comments