@@ -15,7 +15,7 @@ private let context = CIContext()
15
15
/// var image: CIImage
16
16
/// image = MediaProcessing.inSRGBToneCurveSpace(image)
17
17
///
18
- /// // apply user instructions
18
+ /// // Apply user instructions
19
19
/// image = MediaProcessing.apply(image, processing: processing)
20
20
///
21
21
/// image = MediaProcessing.resampleBicubic(image, to: config.size.cgSize)
@@ -59,6 +59,10 @@ public enum MediaProcessing {
59
59
}
60
60
61
61
/// Resample the image using bicubic interpolation.
62
+ /// - Parameters:
63
+ /// - image: The image to resample
64
+ /// - size: The target size
65
+ /// - Returns: The resampled image
62
66
static public func resampleBicubic( _ image: CIImage , to size: CGSize ) -> CIImage {
63
67
let filter = CIFilter . bicubicScaleTransform ( )
64
68
let extent = image. extent. size
@@ -70,19 +74,13 @@ public enum MediaProcessing {
70
74
let desiredAspectRatio = size. width / size. height
71
75
filter. aspectRatio = Float ( 1 / inputAspectRatio * desiredAspectRatio)
72
76
73
- // that image is now the aspect ratio of the target and the size
74
- // of the shorter dimension
75
- let scale : CGFloat
76
- if extent. width < extent. height {
77
- scale = size. width / extent. width
78
- } else {
79
- scale = size. height / extent. height
80
- }
77
+ // Use the same scaling approach regardless of orientation
78
+ let scale = min ( size. width / extent. width, size. height / extent. height)
81
79
filter. scale = Float ( scale)
82
80
83
81
let rescaled = filter. outputImage!
84
82
85
- // the image has a DoD larger than the requested size so crop
83
+ // The image has a DoD larger than the requested size, so crop
86
84
// it to the desired size
87
85
return rescaled. cropped ( to: CGRect ( origin: . zero, size: size) )
88
86
}
@@ -94,7 +92,7 @@ public enum MediaProcessing {
94
92
let filter = CIFilter . colorMatrix ( )
95
93
filter. inputImage = image
96
94
97
- // this should match
95
+ // This should match
98
96
// https://pytorch.org/vision/main/generated/torchvision.transforms.Normalize.html
99
97
//
100
98
// output[channel] = (input[channel] - mean[channel]) / std[channel]
@@ -113,6 +111,10 @@ public enum MediaProcessing {
113
111
}
114
112
115
113
/// Convert the CIImage into a planar 3 channel MLXArray `[1, C, H, W]`
114
+ /// - Parameters:
115
+ /// - image: The image to convert
116
+ /// - colorSpace: Optional color space for rendering
117
+ /// - Returns: The MLXArray representation of the image
116
118
static public func asMLXArray( _ image: CIImage , colorSpace: CGColorSpace ? = nil ) -> MLXArray {
117
119
let size = image. extent. size
118
120
let w = Int ( size. width. rounded ( ) )
@@ -135,10 +137,10 @@ public enum MediaProcessing {
135
137
136
138
var array = MLXArray ( data, [ h, w, 4 ] , type: Float32 . self)
137
139
138
- // drop 4th channel
140
+ // Drop 4th channel
139
141
array = array [ 0 ... , 0 ... , ..< 3 ]
140
142
141
- // convert to 1, C, H, W
143
+ // Convert to 1, C, H, W
142
144
array = array. reshaped ( 1 , h, w, 3 ) . transposed ( 0 , 3 , 1 , 2 )
143
145
144
146
return array
0 commit comments