2
2
// One example is tests/crashtest/images/imagetestsuite/b0b8914cc5f7a6eff409f16d8cc236c5.jpg
3
3
// That's why wrapping operators are needed.
4
4
use crate :: parser:: Dimensions ;
5
- use std:: num:: Wrapping ;
5
+ use std:: {
6
+ convert:: TryFrom ,
7
+ num:: Wrapping ,
8
+ } ;
6
9
7
10
pub ( crate ) fn choose_idct_size ( full_size : Dimensions , requested_size : Dimensions ) -> usize {
8
11
fn scaled ( len : u16 , scale : usize ) -> u16 { ( ( len as u32 * scale as u32 - 1 ) / 8 + 1 ) as u16 }
@@ -28,7 +31,7 @@ fn test_choose_idct_size() {
28
31
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 685 , height: 999 } ) , 2 ) ;
29
32
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 1000 , height: 1000 } ) , 2 ) ;
30
33
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 1400 , height: 1400 } ) , 4 ) ;
31
-
34
+
32
35
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 5472 , height: 3648 } ) , 8 ) ;
33
36
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 16384 , height: 16384 } ) , 8 ) ;
34
37
assert_eq ! ( choose_idct_size( Dimensions { width: 1 , height: 1 } , Dimensions { width: 65535 , height: 65535 } ) , 8 ) ;
@@ -45,79 +48,74 @@ pub(crate) fn dequantize_and_idct_block(scale: usize, coefficients: &[i16], quan
45
48
}
46
49
}
47
50
48
- // This is based on stb_image's 'stbi__idct_block'.
49
- fn dequantize_and_idct_block_8x8 ( coefficients : & [ i16 ] , quantization_table : & [ u16 ; 64 ] , output_linestride : usize , output : & mut [ u8 ] ) {
51
+ pub fn dequantize_and_idct_block_8x8 (
52
+ coefficients : & [ i16 ] ,
53
+ quantization_table : & [ u16 ; 64 ] ,
54
+ output_linestride : usize ,
55
+ output : & mut [ u8 ]
56
+ ) {
50
57
debug_assert_eq ! ( coefficients. len( ) , 64 ) ;
58
+ let output = output
59
+ . chunks_mut ( output_linestride) ;
60
+ dequantize_and_idct_block_8x8_inner ( coefficients, quantization_table, output)
61
+ }
62
+
63
+ // This is based on stb_image's 'stbi__idct_block'.
64
+ fn dequantize_and_idct_block_8x8_inner < ' a , I > (
65
+ coefficients : & [ i16 ] ,
66
+ quantization_table : & [ u16 ; 64 ] ,
67
+ output : I ,
68
+ ) where
69
+ I : IntoIterator < Item = & ' a mut [ u8 ] > ,
70
+ I :: IntoIter : ExactSizeIterator < Item = & ' a mut [ u8 ] > ,
71
+ {
72
+ let output = output. into_iter ( ) ;
73
+ debug_assert ! (
74
+ output. len( ) >= 8 ,
75
+ "Output iterator has the wrong length: {}" ,
76
+ output. len( )
77
+ ) ;
51
78
52
- let mut temp = [ Wrapping ( 0i32 ) ; 64 ] ;
79
+ let mut temp = [ Wrapping ( 0 ) ; 64 ] ;
53
80
54
81
// columns
55
- for i in 0 .. 8 {
56
- // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
57
- if coefficients[ i + 8 ] == 0 && coefficients[ i + 16 ] == 0 && coefficients[ i + 24 ] == 0 &&
58
- coefficients[ i + 32 ] == 0 && coefficients[ i + 40 ] == 0 && coefficients[ i + 48 ] == 0 &&
59
- coefficients[ i + 56 ] == 0 {
60
- let dcterm = Wrapping ( coefficients[ i] as i32 * quantization_table[ i] as i32 ) << 2 ;
61
- temp[ i] = dcterm;
62
- temp[ i + 8 ] = dcterm;
82
+ for i in 0 ..8 {
83
+ if coefficients[ i + 8 ] == 0
84
+ && coefficients[ i + 16 ] == 0
85
+ && coefficients[ i + 24 ] == 0
86
+ && coefficients[ i + 32 ] == 0
87
+ && coefficients[ i + 40 ] == 0
88
+ && coefficients[ i + 48 ] == 0
89
+ && coefficients[ i + 56 ] == 0
90
+ {
91
+ let dcterm = dequantize ( coefficients[ i] , quantization_table[ i] ) << 2 ;
92
+ temp[ i] = dcterm;
93
+ temp[ i + 8 ] = dcterm;
63
94
temp[ i + 16 ] = dcterm;
64
95
temp[ i + 24 ] = dcterm;
65
96
temp[ i + 32 ] = dcterm;
66
97
temp[ i + 40 ] = dcterm;
67
98
temp[ i + 48 ] = dcterm;
68
99
temp[ i + 56 ] = dcterm;
69
- }
70
- else {
71
- let s0 = Wrapping ( coefficients[ i] as i32 * quantization_table[ i] as i32 ) ;
72
- let s1 = Wrapping ( coefficients[ i + 8 ] as i32 * quantization_table[ i + 8 ] as i32 ) ;
73
- let s2 = Wrapping ( coefficients[ i + 16 ] as i32 * quantization_table[ i + 16 ] as i32 ) ;
74
- let s3 = Wrapping ( coefficients[ i + 24 ] as i32 * quantization_table[ i + 24 ] as i32 ) ;
75
- let s4 = Wrapping ( coefficients[ i + 32 ] as i32 * quantization_table[ i + 32 ] as i32 ) ;
76
- let s5 = Wrapping ( coefficients[ i + 40 ] as i32 * quantization_table[ i + 40 ] as i32 ) ;
77
- let s6 = Wrapping ( coefficients[ i + 48 ] as i32 * quantization_table[ i + 48 ] as i32 ) ;
78
- let s7 = Wrapping ( coefficients[ i + 56 ] as i32 * quantization_table[ i + 56 ] as i32 ) ;
79
-
80
- let p2 = s2;
81
- let p3 = s6;
82
- let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
83
- let t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
84
- let t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
85
- let p2 = s0;
86
- let p3 = s4;
87
- let t0 = stbi_fsh ( p2 + p3) ;
88
- let t1 = stbi_fsh ( p2 - p3) ;
89
- let x0 = t0 + t3;
90
- let x3 = t0 - t3;
91
- let x1 = t1 + t2;
92
- let x2 = t1 - t2;
93
- let t0 = s7;
94
- let t1 = s5;
95
- let t2 = s3;
96
- let t3 = s1;
97
- let p3 = t0 + t2;
98
- let p4 = t1 + t3;
99
- let p1 = t0 + t3;
100
- let p2 = t1 + t2;
101
- let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
102
- let t0 = t0 * stbi_f2f ( 0.298631336 ) ;
103
- let t1 = t1 * stbi_f2f ( 2.053119869 ) ;
104
- let t2 = t2 * stbi_f2f ( 3.072711026 ) ;
105
- let t3 = t3 * stbi_f2f ( 1.501321110 ) ;
106
- let p1 = p5 + ( p1 * stbi_f2f ( -0.899976223 ) ) ;
107
- let p2 = p5 + ( p2 * stbi_f2f ( -2.562915447 ) ) ;
108
- let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
109
- let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
110
- let t3 = t3 + p1 + p4;
111
- let t2 = t2 + p2 + p3;
112
- let t1 = t1 + p2 + p4;
113
- let t0 = t0 + p1 + p3;
114
-
115
- // constants scaled things up by 1<<12; let's bring them back
116
- // down, but keep 2 extra bits of precision
117
- let x0 = x0 + Wrapping ( 512 ) ;
118
- let x1 = x1 + Wrapping ( 512 ) ;
119
- let x2 = x2 + Wrapping ( 512 ) ;
120
- let x3 = x3 + Wrapping ( 512 ) ;
100
+ } else {
101
+ let s0 = dequantize ( coefficients[ i] , quantization_table[ i] ) ;
102
+ let s1 = dequantize ( coefficients[ i + 8 ] , quantization_table[ i + 8 ] ) ;
103
+ let s2 = dequantize ( coefficients[ i + 16 ] , quantization_table[ i + 16 ] ) ;
104
+ let s3 = dequantize ( coefficients[ i + 24 ] , quantization_table[ i + 24 ] ) ;
105
+ let s4 = dequantize ( coefficients[ i + 32 ] , quantization_table[ i + 32 ] ) ;
106
+ let s5 = dequantize ( coefficients[ i + 40 ] , quantization_table[ i + 40 ] ) ;
107
+ let s6 = dequantize ( coefficients[ i + 48 ] , quantization_table[ i + 48 ] ) ;
108
+ let s7 = dequantize ( coefficients[ i + 56 ] , quantization_table[ i + 56 ] ) ;
109
+
110
+ let Kernel {
111
+ xs : [ x0, x1, x2, x3] ,
112
+ ts : [ t0, t1, t2, t3] ,
113
+ } = kernel (
114
+ [ s0, s1, s2, s3, s4, s5, s6, s7] ,
115
+ // constants scaled things up by 1<<12; let's bring them back
116
+ // down, but keep 2 extra bits of precision
117
+ 512 ,
118
+ ) ;
121
119
122
120
temp[ i] = ( x0 + t3) >> 10 ;
123
121
temp[ i + 56 ] = ( x0 - t3) >> 10 ;
@@ -130,72 +128,126 @@ fn dequantize_and_idct_block_8x8(coefficients: &[i16], quantization_table: &[u16
130
128
}
131
129
}
132
130
133
- for i in 0 .. 8 {
134
- // no fast case since the first 1D IDCT spread components out
135
- let s0 = temp[ i * 8 ] ;
136
- let s1 = temp[ i * 8 + 1 ] ;
137
- let s2 = temp[ i * 8 + 2 ] ;
138
- let s3 = temp[ i * 8 + 3 ] ;
139
- let s4 = temp[ i * 8 + 4 ] ;
140
- let s5 = temp[ i * 8 + 5 ] ;
141
- let s6 = temp[ i * 8 + 6 ] ;
142
- let s7 = temp[ i * 8 + 7 ] ;
143
-
144
- let p2 = s2;
145
- let p3 = s6;
146
- let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
147
- let t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
148
- let t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
149
- let p2 = s0;
150
- let p3 = s4;
151
- let t0 = stbi_fsh ( p2 + p3) ;
152
- let t1 = stbi_fsh ( p2 - p3) ;
153
- let x0 = t0 + t3;
154
- let x3 = t0 - t3;
155
- let x1 = t1 + t2;
156
- let x2 = t1 - t2;
157
- let t0 = s7;
158
- let t1 = s5;
159
- let t2 = s3;
160
- let t3 = s1;
161
- let p3 = t0 + t2;
162
- let p4 = t1 + t3;
163
- let p1 = t0 + t3;
164
- let p2 = t1 + t2;
165
- let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
166
- let t0 = t0 * stbi_f2f ( 0.298631336 ) ;
167
- let t1 = t1 * stbi_f2f ( 2.053119869 ) ;
168
- let t2 = t2 * stbi_f2f ( 3.072711026 ) ;
169
- let t3 = t3 * stbi_f2f ( 1.501321110 ) ;
170
- let p1 = p5 + p1 * stbi_f2f ( -0.899976223 ) ;
171
- let p2 = p5 + p2 * stbi_f2f ( -2.562915447 ) ;
172
- let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
173
- let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
174
- let t3 = t3 + p1 + p4;
175
- let t2 = t2 + p2 + p3;
176
- let t1 = t1 + p2 + p4;
177
- let t0 = t0 + p1 + p3;
131
+ for ( chunk, output_chunk) in temp. chunks_exact ( 8 ) . zip ( output) {
132
+ let chunk = <& [ _ ; 8 ] >:: try_from ( chunk) . unwrap ( ) ;
178
133
179
134
// constants scaled things up by 1<<12, plus we had 1<<2 from first
180
135
// loop, plus horizontal and vertical each scale by sqrt(8) so together
181
136
// we've got an extra 1<<3, so 1<<17 total we need to remove.
182
137
// so we want to round that, which means adding 0.5 * 1<<17,
183
138
// aka 65536. Also, we'll end up with -128 to 127 that we want
184
139
// to encode as 0..255 by adding 128, so we'll add that before the shift
185
- let x0 = x0 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
186
- let x1 = x1 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
187
- let x2 = x2 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
188
- let x3 = x3 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
189
-
190
- output[ i * output_linestride] = stbi_clamp ( ( x0 + t3) >> 17 ) ;
191
- output[ i * output_linestride + 7 ] = stbi_clamp ( ( x0 - t3) >> 17 ) ;
192
- output[ i * output_linestride + 1 ] = stbi_clamp ( ( x1 + t2) >> 17 ) ;
193
- output[ i * output_linestride + 6 ] = stbi_clamp ( ( x1 - t2) >> 17 ) ;
194
- output[ i * output_linestride + 2 ] = stbi_clamp ( ( x2 + t1) >> 17 ) ;
195
- output[ i * output_linestride + 5 ] = stbi_clamp ( ( x2 - t1) >> 17 ) ;
196
- output[ i * output_linestride + 3 ] = stbi_clamp ( ( x3 + t0) >> 17 ) ;
197
- output[ i * output_linestride + 4 ] = stbi_clamp ( ( x3 - t0) >> 17 ) ;
140
+ const X_SCALE : i32 = 65536 + ( 128 << 17 ) ;
141
+
142
+ let [ s0, rest @ ..] = chunk;
143
+ if * rest == [ Wrapping ( 0 ) ; 7 ] {
144
+ let dcterm = stbi_clamp ( ( stbi_fsh ( * s0) + Wrapping ( X_SCALE ) ) >> 17 ) ;
145
+ output_chunk[ 0 ] = dcterm;
146
+ output_chunk[ 1 ] = dcterm;
147
+ output_chunk[ 2 ] = dcterm;
148
+ output_chunk[ 3 ] = dcterm;
149
+ output_chunk[ 4 ] = dcterm;
150
+ output_chunk[ 5 ] = dcterm;
151
+ output_chunk[ 6 ] = dcterm;
152
+ output_chunk[ 7 ] = dcterm;
153
+ } else {
154
+ let Kernel {
155
+ xs : [ x0, x1, x2, x3] ,
156
+ ts : [ t0, t1, t2, t3] ,
157
+ } = kernel ( * chunk, X_SCALE ) ;
158
+
159
+ output_chunk[ 0 ] = stbi_clamp ( ( x0 + t3) >> 17 ) ;
160
+ output_chunk[ 7 ] = stbi_clamp ( ( x0 - t3) >> 17 ) ;
161
+ output_chunk[ 1 ] = stbi_clamp ( ( x1 + t2) >> 17 ) ;
162
+ output_chunk[ 6 ] = stbi_clamp ( ( x1 - t2) >> 17 ) ;
163
+ output_chunk[ 2 ] = stbi_clamp ( ( x2 + t1) >> 17 ) ;
164
+ output_chunk[ 5 ] = stbi_clamp ( ( x2 - t1) >> 17 ) ;
165
+ output_chunk[ 3 ] = stbi_clamp ( ( x3 + t0) >> 17 ) ;
166
+ output_chunk[ 4 ] = stbi_clamp ( ( x3 - t0) >> 17 ) ;
167
+ }
168
+ }
169
+ }
170
+
171
+ struct Kernel {
172
+ xs : [ Wrapping < i32 > ; 4 ] ,
173
+ ts : [ Wrapping < i32 > ; 4 ] ,
174
+ }
175
+
176
+ #[ inline]
177
+ fn kernel_x ( [ s0, s2, s4, s6] : [ Wrapping < i32 > ; 4 ] , x_scale : i32 ) -> [ Wrapping < i32 > ; 4 ] {
178
+ // Even `chunk` indicies
179
+ let ( t2, t3) ;
180
+ {
181
+ let p2 = s2;
182
+ let p3 = s6;
183
+
184
+ let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
185
+ t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
186
+ t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
187
+ }
188
+
189
+ let ( t0, t1) ;
190
+ {
191
+ let p2 = s0;
192
+ let p3 = s4;
193
+
194
+ t0 = stbi_fsh ( p2 + p3) ;
195
+ t1 = stbi_fsh ( p2 - p3) ;
198
196
}
197
+
198
+ let x0 = t0 + t3;
199
+ let x3 = t0 - t3;
200
+ let x1 = t1 + t2;
201
+ let x2 = t1 - t2;
202
+
203
+ let x_scale = Wrapping ( x_scale) ;
204
+
205
+ [ x0 + x_scale, x1 + x_scale, x2 + x_scale, x3 + x_scale]
206
+ }
207
+
208
+ #[ inline]
209
+ fn kernel_t ( [ s1, s3, s5, s7] : [ Wrapping < i32 > ; 4 ] ) -> [ Wrapping < i32 > ; 4 ] {
210
+ // Odd `chunk` indicies
211
+ let mut t0 = s7;
212
+ let mut t1 = s5;
213
+ let mut t2 = s3;
214
+ let mut t3 = s1;
215
+
216
+ let p3 = t0 + t2;
217
+ let p4 = t1 + t3;
218
+ let p1 = t0 + t3;
219
+ let p2 = t1 + t2;
220
+ let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
221
+
222
+ t0 *= stbi_f2f ( 0.298631336 ) ;
223
+ t1 *= stbi_f2f ( 2.053119869 ) ;
224
+ t2 *= stbi_f2f ( 3.072711026 ) ;
225
+ t3 *= stbi_f2f ( 1.501321110 ) ;
226
+
227
+ let p1 = p5 + p1 * stbi_f2f ( -0.899976223 ) ;
228
+ let p2 = p5 + p2 * stbi_f2f ( -2.562915447 ) ;
229
+ let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
230
+ let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
231
+
232
+ t3 += p1 + p4;
233
+ t2 += p2 + p3;
234
+ t1 += p2 + p4;
235
+ t0 += p1 + p3;
236
+
237
+ [ t0, t1, t2, t3]
238
+ }
239
+
240
+ #[ inline]
241
+ fn kernel ( [ s0, s1, s2, s3, s4, s5, s6, s7] : [ Wrapping < i32 > ; 8 ] , x_scale : i32 ) -> Kernel {
242
+ Kernel {
243
+ xs : kernel_x ( [ s0, s2, s4, s6] , x_scale) ,
244
+ ts : kernel_t ( [ s1, s3, s5, s7] ) ,
245
+ }
246
+ }
247
+
248
+ #[ inline( always) ]
249
+ fn dequantize ( c : i16 , q : u16 ) -> Wrapping < i32 > {
250
+ Wrapping ( i32:: from ( c) * i32:: from ( q) )
199
251
}
200
252
201
253
// 4x4 and 2x2 IDCT based on Rakesh Dugad and Narendra Ahuja: "A Fast Scheme for Image Size Change in the Compressed Domain" (2001).
0 commit comments