1
1
// Copyright 2024 Apple Inc.
2
2
3
+ import AVKit
3
4
import CoreImage
4
5
import MLX
5
6
import MLXLMCommon
@@ -19,12 +20,28 @@ struct ContentView: View {
19
20
@State var llm = VLMEvaluator ( )
20
21
@Environment ( DeviceStat . self) private var deviceStat
21
22
22
- @State private var selectedImage : PlatformImage ? = nil
23
+ @State private var selectedImage : PlatformImage ? = nil {
24
+ didSet {
25
+ if selectedImage != nil {
26
+ selectedVideoURL = nil
27
+ player = nil
28
+ }
29
+ }
30
+ }
31
+ @State private var selectedVideoURL : URL ? = nil {
32
+ didSet {
33
+ if let selectedVideoURL {
34
+ player = AVPlayer ( url: selectedVideoURL)
35
+ selectedImage = nil
36
+ }
37
+ }
38
+ }
23
39
@State private var showingImagePicker = false
24
40
@State private var selectedItem : PhotosPickerItem ? = nil
41
+ @State private var player : AVPlayer ? = nil
25
42
26
43
private var currentImageURL : URL ? {
27
- selectedImage == nil
44
+ selectedImage == nil && selectedVideoURL == nil
28
45
? URL (
29
46
string:
30
47
" https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg "
@@ -74,40 +91,60 @@ struct ContentView: View {
74
91
EmptyView ( )
75
92
}
76
93
}
94
+ } else if let player {
95
+ VideoPlayer ( player: player)
96
+ . scaledToFit ( )
97
+ . frame ( maxHeight: 300 )
98
+ . cornerRadius ( 12 )
77
99
}
78
100
79
101
HStack {
80
102
#if os(iOS)
81
103
PhotosPicker (
82
104
selection: $selectedItem,
83
- matching: . images
105
+ matching: PHPickerFilter . any ( of: [
106
+ PHPickerFilter . images, PHPickerFilter . videos,
107
+ ] )
84
108
) {
85
- Label ( " Select Image " , systemImage: " photo.badge.plus " )
109
+ Label ( " Select Image/Video " , systemImage: " photo.badge.plus " )
86
110
}
87
111
. onChange ( of: selectedItem) {
88
112
Task {
89
- if let data = try ? await selectedItem? . loadTransferable (
113
+ if let video = try ? await selectedItem? . loadTransferable (
114
+ type: TransferableVideo . self)
115
+ {
116
+ selectedVideoURL = video. url
117
+ } else if let data = try ? await selectedItem? . loadTransferable (
90
118
type: Data . self)
91
119
{
92
120
selectedImage = PlatformImage ( data: data)
93
121
}
94
122
}
95
123
}
96
124
#else
97
- Button ( " Select Image " ) {
125
+ Button( " Select Image/Video " ) {
98
126
showingImagePicker = true
99
127
}
100
128
. fileImporter (
101
129
isPresented: $showingImagePicker,
102
- allowedContentTypes: [ . image]
130
+ allowedContentTypes: [ . image, . movie ]
103
131
) { result in
104
132
switch result {
105
133
case . success( let file) :
106
134
Task { @MainActor in
107
135
do {
108
- let data = try loadImage ( from: file)
136
+ let data = try loadData ( from: file)
109
137
if let image = PlatformImage ( data: data) {
110
138
selectedImage = image
139
+ } else if let fileType = UTType (
140
+ filenameExtension: file. pathExtension) ,
141
+ fileType. conforms ( to: . movie)
142
+ {
143
+ if let sandboxURL = try ? loadVideoToSandbox (
144
+ from: file)
145
+ {
146
+ selectedVideoURL = sandboxURL
147
+ }
111
148
} else {
112
149
print ( " Failed to create image from data " )
113
150
}
@@ -214,30 +251,34 @@ struct ContentView: View {
214
251
if let selectedImage = selectedImage {
215
252
#if os(iOS)
216
253
let ciImage = CIImage ( image: selectedImage)
217
- await llm. generate ( prompt: prompt, image: ciImage ?? CIImage ( ) )
254
+ await llm. generate ( prompt: prompt, image: ciImage ?? CIImage ( ) , videoURL : nil )
218
255
#else
219
256
if let cgImage = selectedImage. cgImage (
220
257
forProposedRect: nil , context: nil , hints: nil )
221
258
{
222
259
let ciImage = CIImage ( cgImage: cgImage)
223
- await llm. generate ( prompt: prompt, image: ciImage)
260
+ await llm. generate ( prompt: prompt, image: ciImage, videoURL : nil )
224
261
}
225
262
#endif
226
263
} else if let imageURL = currentImageURL {
227
264
do {
228
265
let ( data, _) = try await URLSession . shared. data ( from: imageURL)
229
266
if let ciImage = CIImage ( data: data) {
230
- await llm. generate ( prompt: prompt, image: ciImage)
267
+ await llm. generate ( prompt: prompt, image: ciImage, videoURL : nil )
231
268
}
232
269
} catch {
233
270
print ( " Failed to load image: \( error. localizedDescription) " )
234
271
}
272
+ } else {
273
+ if let videoURL = selectedVideoURL {
274
+ await llm. generate ( prompt: prompt, image: nil , videoURL: videoURL)
275
+ }
235
276
}
236
277
}
237
278
}
238
279
239
280
#if os(macOS)
240
- private func loadImage ( from url: URL ) throws -> Data {
281
+ private func loadData ( from url: URL) throws -> Data {
241
282
guard url. startAccessingSecurityScopedResource ( ) else {
242
283
throw NSError (
243
284
domain: " FileAccess " , code: - 1 ,
@@ -246,6 +287,17 @@ struct ContentView: View {
246
287
defer { url. stopAccessingSecurityScopedResource ( ) }
247
288
return try Data ( contentsOf: url)
248
289
}
290
+
291
+ private func loadVideoToSandbox( from url: URL) throws -> URL {
292
+ guard url. startAccessingSecurityScopedResource ( ) else {
293
+ throw NSError (
294
+ domain: " FileAccess " , code: - 1 ,
295
+ userInfo: [ NSLocalizedDescriptionKey: " Failed to access the file. " ] )
296
+ }
297
+ defer { url. stopAccessingSecurityScopedResource ( ) }
298
+ let sandboxURL = try SandboxFileTransfer . transferFileToTemp ( from: url)
299
+ return sandboxURL
300
+ }
249
301
#endif
250
302
251
303
private func copyToClipboard( _ string: String) {
@@ -318,7 +370,7 @@ class VLMEvaluator {
318
370
}
319
371
}
320
372
321
- func generate( prompt: String , image: CIImage ) async {
373
+ func generate( prompt: String , image: CIImage ? , videoURL : URL ? ) async {
322
374
guard !running else { return }
323
375
324
376
running = true
@@ -331,7 +383,9 @@ class VLMEvaluator {
331
383
MLXRandom . seed ( UInt64 ( Date . timeIntervalSinceReferenceDate * 1000 ) )
332
384
333
385
let result = try await modelContainer. perform { context in
334
- var userInput = UserInput ( prompt: prompt, images: [ . ciImage( image) ] )
386
+ let images : [ UserInput . Image ] = image != nil ? [ . ciImage( image!) ] : [ ]
387
+ let videos : [ UserInput . Video ] = videoURL != nil ? [ . url( videoURL!) ] : [ ]
388
+ var userInput = UserInput ( prompt: prompt, images: images, videos: videos)
335
389
userInput. processing. resize = . init( width: 448 , height: 448 )
336
390
337
391
let input = try await context. processor. prepare ( input: userInput)
@@ -370,3 +424,32 @@ class VLMEvaluator {
370
424
running = false
371
425
}
372
426
}
427
+
428
+ #if os(iOS)
429
+ struct TransferableVideo: Transferable {
430
+ let url : URL
431
+
432
+ static var transferRepresentation : some TransferRepresentation {
433
+ FileRepresentation ( contentType: . movie) { movie in
434
+ SentTransferredFile ( movie. url)
435
+ } importing: { received in
436
+ let sandboxURL = try SandboxFileTransfer . transferFileToTemp ( from: received. file)
437
+ return . init( url: sandboxURL)
438
+ }
439
+ }
440
+ }
441
+ #endif
442
+
443
+ struct SandboxFileTransfer {
444
+ static func transferFileToTemp( from sourceURL: URL) throws -> URL {
445
+ let tempDir = FileManager . default. temporaryDirectory
446
+ let sandboxURL = tempDir. appendingPathComponent ( sourceURL. lastPathComponent)
447
+
448
+ if FileManager . default. fileExists ( atPath: sandboxURL. path ( ) ) {
449
+ try FileManager . default. removeItem ( at: sandboxURL)
450
+ }
451
+
452
+ try FileManager . default. copyItem ( at: sourceURL, to: sandboxURL)
453
+ return sandboxURL
454
+ }
455
+ }
0 commit comments