Skip to content

Commit ae7e1db

Browse files
committed
Add 4x4 and 8x8 IDCT
1 parent ce60d39 commit ae7e1db

File tree

2 files changed

+100
-8
lines changed

2 files changed

+100
-8
lines changed

src/idct.rs

Lines changed: 97 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,6 @@
22
// One example is tests/crashtest/images/imagetestsuite/b0b8914cc5f7a6eff409f16d8cc236c5.jpg
33
// That's why wrapping operators are needed.
44

5-
pub fn dequantize_and_idct_block_1x1(coefficients: &[i16], quantization_table: &[u16; 64], _output_linestride: usize, output: &mut [u8]) {
6-
debug_assert_eq!(coefficients.len(), 64);
7-
8-
let s0 = (coefficients[0] as i32 * quantization_table[0] as i32).wrapping_add(128 * 8) / 8;
9-
output[0] = stbi_clamp(s0);
10-
}
11-
125
// This is based on stb_image's 'stbi__idct_block'.
136
pub fn dequantize_and_idct_block_8x8(coefficients: &[i16], quantization_table: &[u16; 64], output_linestride: usize, output: &mut [u8]) {
147
debug_assert_eq!(coefficients.len(), 64);
@@ -162,6 +155,103 @@ pub fn dequantize_and_idct_block_8x8(coefficients: &[i16], quantization_table: &
162155
}
163156
}
164157

158+
// 4x4 and 2x2 IDCT based on Rakesh Dugad and Narendra Ahuja: "A Fast Scheme for Image Size Change in the Compressed Domain" (2001).
159+
// http://sylvana.net/jpegcrop/jidctred/
160+
pub fn dequantize_and_idct_block_4x4(coefficients: &[i16], quantization_table: &[u16; 64], output_linestride: usize, output: &mut [u8]) {
161+
debug_assert_eq!(coefficients.len(), 64);
162+
let mut temp = [0i32; 4*4];
163+
164+
const CONST_BITS: u32 = 12;
165+
const PASS1_BITS: u32 = 2;
166+
const FINAL_BITS: u32 = CONST_BITS + PASS1_BITS + 3;
167+
168+
// columns
169+
for i in 0 .. 4 {
170+
let s0 = coefficients[i + 8*0] as i32 * quantization_table[i + 8*0] as i32;
171+
let s1 = coefficients[i + 8*1] as i32 * quantization_table[i + 8*1] as i32;
172+
let s2 = coefficients[i + 8*2] as i32 * quantization_table[i + 8*2] as i32;
173+
let s3 = coefficients[i + 8*3] as i32 * quantization_table[i + 8*3] as i32;
174+
175+
let x0 = s0.wrapping_add(s2).wrapping_shl(PASS1_BITS);
176+
let x2 = s0.wrapping_sub(s2).wrapping_shl(PASS1_BITS);
177+
178+
let p1 = s1.wrapping_add(s3).wrapping_mul(stbi_f2f(0.541196100));
179+
let t0 = p1.wrapping_add(s3.wrapping_mul(stbi_f2f(-1.847759065))).wrapping_add(512).wrapping_shr(CONST_BITS - PASS1_BITS);
180+
let t2 = p1.wrapping_add(s1.wrapping_mul(stbi_f2f( 0.765366865))).wrapping_add(512).wrapping_shr(CONST_BITS - PASS1_BITS);
181+
182+
temp[i + 4*0] = x0.wrapping_add(t2);
183+
temp[i + 4*3] = x0.wrapping_sub(t2);
184+
temp[i + 4*1] = x2.wrapping_add(t0);
185+
temp[i + 4*2] = x2.wrapping_sub(t0);
186+
}
187+
188+
for i in 0 .. 4 {
189+
let s0 = temp[i * 4 + 0];
190+
let s1 = temp[i * 4 + 1];
191+
let s2 = temp[i * 4 + 2];
192+
let s3 = temp[i * 4 + 3];
193+
194+
let x0 = s0.wrapping_add(s2).wrapping_shl(CONST_BITS);
195+
let x2 = s0.wrapping_sub(s2).wrapping_shl(CONST_BITS);
196+
197+
let p1 = s1.wrapping_add(s3).wrapping_mul(stbi_f2f(0.541196100));
198+
let t0 = p1.wrapping_add(s3.wrapping_mul(stbi_f2f(-1.847759065)));
199+
let t2 = p1.wrapping_add(s1.wrapping_mul(stbi_f2f(0.765366865)));
200+
201+
// constants scaled things up by 1<<12, plus we had 1<<2 from first
202+
// loop, plus horizontal and vertical each scale by sqrt(8) so together
203+
// we've got an extra 1<<3, so 1<<17 total we need to remove.
204+
// so we want to round that, which means adding 0.5 * 1<<17,
205+
// aka 65536. Also, we'll end up with -128 to 127 that we want
206+
// to encode as 0..255 by adding 128, so we'll add that before the shift
207+
let x0 = x0.wrapping_add((1 << (FINAL_BITS - 1)) + (128 << FINAL_BITS));
208+
let x2 = x2.wrapping_add((1 << (FINAL_BITS - 1)) + (128 << FINAL_BITS));
209+
210+
output[i * output_linestride + 0] = stbi_clamp(x0.wrapping_add(t2).wrapping_shr(FINAL_BITS));
211+
output[i * output_linestride + 3] = stbi_clamp(x0.wrapping_sub(t2).wrapping_shr(FINAL_BITS));
212+
output[i * output_linestride + 1] = stbi_clamp(x2.wrapping_add(t0).wrapping_shr(FINAL_BITS));
213+
output[i * output_linestride + 2] = stbi_clamp(x2.wrapping_sub(t0).wrapping_shr(FINAL_BITS));
214+
}
215+
}
216+
217+
pub fn dequantize_and_idct_block_2x2(coefficients: &[i16], quantization_table: &[u16; 64], output_linestride: usize, output: &mut [u8]) {
218+
debug_assert_eq!(coefficients.len(), 64);
219+
220+
const SCALE_BITS: u32 = 3;
221+
222+
// Column 0
223+
let s00 = coefficients[8*0] as i32 * quantization_table[8*0] as i32;
224+
let s10 = coefficients[8*1] as i32 * quantization_table[8*1] as i32;
225+
226+
let x0 = s00.wrapping_add(s10);
227+
let x2 = s00.wrapping_sub(s10);
228+
229+
// Column 1
230+
let s01 = coefficients[8*0+1] as i32 * quantization_table[8*0+1] as i32;
231+
let s11 = coefficients[8*1+1] as i32 * quantization_table[8*1+1] as i32;
232+
233+
let x1 = s01.wrapping_add(s11);
234+
let x3 = s01.wrapping_sub(s11);
235+
236+
let x0 = x0.wrapping_add((1 << (SCALE_BITS-1)) + (128 << SCALE_BITS));
237+
let x2 = x2.wrapping_add((1 << (SCALE_BITS-1)) + (128 << SCALE_BITS));
238+
239+
// Row 0
240+
output[0] = stbi_clamp(x0.wrapping_add(x1).wrapping_shr(SCALE_BITS));
241+
output[1] = stbi_clamp(x0.wrapping_sub(x1).wrapping_shr(SCALE_BITS));
242+
243+
// Row 1
244+
output[output_linestride + 0] = stbi_clamp(x2.wrapping_add(x3).wrapping_shr(SCALE_BITS));
245+
output[output_linestride + 1] = stbi_clamp(x2.wrapping_sub(x3).wrapping_shr(SCALE_BITS));
246+
}
247+
248+
pub fn dequantize_and_idct_block_1x1(coefficients: &[i16], quantization_table: &[u16; 64], _output_linestride: usize, output: &mut [u8]) {
249+
debug_assert_eq!(coefficients.len(), 64);
250+
251+
let s0 = (coefficients[0] as i32 * quantization_table[0] as i32).wrapping_add(128 * 8) / 8;
252+
output[0] = stbi_clamp(s0);
253+
}
254+
165255
// take a -128..127 value and stbi__clamp it and convert to 0..255
166256
fn stbi_clamp(x: i32) -> u8
167257
{

src/worker/immediate.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use decoder::MAX_COMPONENTS;
22
use error::Result;
3-
use idct::{ dequantize_and_idct_block_8x8, dequantize_and_idct_block_1x1 };
3+
use idct::{ dequantize_and_idct_block_8x8, dequantize_and_idct_block_4x4, dequantize_and_idct_block_2x2, dequantize_and_idct_block_1x1 };
44
use std::mem;
55
use std::sync::Arc;
66
use parser::Component;
@@ -49,6 +49,8 @@ impl ImmediateWorker {
4949

5050
match component.dct_scale {
5151
8 => dequantize_and_idct_block_8x8(coefficients, quantization_table, line_stride, output),
52+
4 => dequantize_and_idct_block_4x4(coefficients, quantization_table, line_stride, output),
53+
2 => dequantize_and_idct_block_2x2(coefficients, quantization_table, line_stride, output),
5254
1 => dequantize_and_idct_block_1x1(coefficients, quantization_table, line_stride, output),
5355
_ => unimplemented!(),
5456
}

0 commit comments

Comments
 (0)