Skip to content

Commit 7c3be92

Browse files
committed
perf(graphics): use const generics for DWT
That seems to speed up a bit the code: rfxenc time: [46.040 µs 46.288 µs 46.698 µs] change: [-9.2580% -8.6663% -7.8304%] (p = 0.00 < 0.05) Performance has improved. Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
1 parent d6873fb commit 7c3be92

File tree

1 file changed

+23
-23
lines changed
  • crates/ironrdp-graphics/src

1 file changed

+23
-23
lines changed

crates/ironrdp-graphics/src/dwt.rs

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
11
use ironrdp_pdu::utils::SplitTo as _;
22

33
pub fn encode(buffer: &mut [i16], temp_buffer: &mut [i16]) {
4-
encode_block(&mut *buffer, temp_buffer, 32);
5-
encode_block(&mut buffer[3072..], temp_buffer, 16);
6-
encode_block(&mut buffer[3840..], temp_buffer, 8);
4+
encode_block::<32>(&mut *buffer, temp_buffer);
5+
encode_block::<16>(&mut buffer[3072..], temp_buffer);
6+
encode_block::<8>(&mut buffer[3840..], temp_buffer);
77
}
88

9-
fn encode_block(buffer: &mut [i16], temp_buffer: &mut [i16], subband_width: usize) {
10-
dwt_vertical(buffer, temp_buffer, subband_width);
11-
dwt_horizontal(buffer, temp_buffer, subband_width);
9+
fn encode_block<const SUBBAND_WIDTH: usize>(buffer: &mut [i16], temp_buffer: &mut [i16]) {
10+
dwt_vertical::<SUBBAND_WIDTH>(buffer, temp_buffer);
11+
dwt_horizontal::<SUBBAND_WIDTH>(buffer, temp_buffer);
1212
}
1313

1414
// DWT in vertical direction, results in 2 sub-bands in L, H order in tmp buffer dwt.
15-
fn dwt_vertical(buffer: &[i16], dwt: &mut [i16], subband_width: usize) {
16-
let total_width = subband_width * 2;
15+
fn dwt_vertical<const SUBBAND_WIDTH: usize>(buffer: &[i16], dwt: &mut [i16]) {
16+
let total_width = SUBBAND_WIDTH * 2;
1717

1818
for x in 0..total_width {
19-
for n in 0..subband_width {
19+
for n in 0..SUBBAND_WIDTH {
2020
let y = n * 2;
2121
let l_index = n * total_width + x;
22-
let h_index = l_index + subband_width * total_width;
22+
let h_index = l_index + SUBBAND_WIDTH * total_width;
2323
let src_index = y * total_width + x;
2424

2525
dwt[h_index] = ((i32::from(buffer[src_index + total_width])
2626
- ((i32::from(buffer[src_index])
27-
+ i32::from(buffer[src_index + if n < subband_width - 1 { 2 * total_width } else { 0 }]))
27+
+ i32::from(buffer[src_index + if n < SUBBAND_WIDTH - 1 { 2 * total_width } else { 0 }]))
2828
>> 1))
2929
>> 1) as i16;
3030
dwt[l_index] = (i32::from(buffer[src_index])
@@ -41,24 +41,24 @@ fn dwt_vertical(buffer: &[i16], dwt: &mut [i16], subband_width: usize) {
4141
// LL(3) order, stored in original buffer.
4242
// The lower part L generates LL(3) and HL(0).
4343
// The higher part H generates LH(1) and HH(2).
44-
fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) {
45-
let total_width = subband_width * 2;
46-
let squared_subband_width = subband_width.pow(2);
44+
fn dwt_horizontal<const SUBBAND_WIDTH: usize>(mut buffer: &mut [i16], dwt: &[i16]) {
45+
let total_width = SUBBAND_WIDTH * 2;
46+
let squared_subband_width = SUBBAND_WIDTH.pow(2);
4747

4848
let mut hl = buffer.split_to(squared_subband_width);
4949
let mut lh = buffer.split_to(squared_subband_width);
5050
let mut hh = buffer.split_to(squared_subband_width);
5151
let mut ll = buffer;
5252
let (mut l_src, mut h_src) = dwt.split_at(squared_subband_width * 2);
5353

54-
for _ in 0..subband_width {
54+
for _ in 0..SUBBAND_WIDTH {
5555
// L
56-
for n in 0..subband_width {
56+
for n in 0..SUBBAND_WIDTH {
5757
let x = n * 2;
5858

5959
// HL
6060
hl[n] = ((i32::from(l_src[x + 1])
61-
- ((i32::from(l_src[x]) + i32::from(l_src[if n < subband_width - 1 { x + 2 } else { x }])) >> 1))
61+
- ((i32::from(l_src[x]) + i32::from(l_src[if n < SUBBAND_WIDTH - 1 { x + 2 } else { x }])) >> 1))
6262
>> 1) as i16;
6363
// LL
6464
ll[n] = (i32::from(l_src[x])
@@ -70,12 +70,12 @@ fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) {
7070
}
7171

7272
// H
73-
for n in 0..subband_width {
73+
for n in 0..SUBBAND_WIDTH {
7474
let x = n * 2;
7575

7676
// HH
7777
hh[n] = ((i32::from(h_src[x + 1])
78-
- ((i32::from(h_src[x]) + i32::from(h_src[if n < subband_width - 1 { x + 2 } else { x }])) >> 1))
78+
- ((i32::from(h_src[x]) + i32::from(h_src[if n < SUBBAND_WIDTH - 1 { x + 2 } else { x }])) >> 1))
7979
>> 1) as i16;
8080
// LH
8181
lh[n] = (i32::from(h_src[x])
@@ -86,10 +86,10 @@ fn dwt_horizontal(mut buffer: &mut [i16], dwt: &[i16], subband_width: usize) {
8686
}) as i16;
8787
}
8888

89-
hl = &mut hl[subband_width..];
90-
lh = &mut lh[subband_width..];
91-
hh = &mut hh[subband_width..];
92-
ll = &mut ll[subband_width..];
89+
hl = &mut hl[SUBBAND_WIDTH..];
90+
lh = &mut lh[SUBBAND_WIDTH..];
91+
hh = &mut hh[SUBBAND_WIDTH..];
92+
ll = &mut ll[SUBBAND_WIDTH..];
9393

9494
l_src = &l_src[total_width..];
9595
h_src = &h_src[total_width..];

0 commit comments

Comments
 (0)