@@ -20,7 +20,9 @@ struct ImmediateWorker {
20
20
quantization_tables : [ Option < Arc < [ u16 ; 64 ] > > ; MAX_COMPONENTS ] ,
21
21
}
22
22
23
+ #[ derive( Clone , Copy ) ]
23
24
struct ComponentMetadata {
25
+ block_width : usize ,
24
26
block_count : usize ,
25
27
line_stride : usize ,
26
28
dct_scale : usize ,
@@ -53,38 +55,39 @@ impl ImmediateWorker {
53
55
core:: mem:: take ( & mut self . results [ index] )
54
56
}
55
57
56
- pub fn component_metadata ( & self , index : usize ) -> ComponentMetadata {
57
- let component = self . components [ index] . as_ref ( ) . unwrap ( ) ;
58
+ pub fn component_metadata ( & self , index : usize ) -> Option < ComponentMetadata > {
59
+ let component = self . components [ index] . as_ref ( ) ? ;
58
60
let block_size = component. block_size ;
61
+ let block_width = block_size. width as usize ;
59
62
let block_count = block_size. width as usize * component. vertical_sampling_factor as usize ;
60
63
let line_stride = block_size. width as usize * component. dct_scale ;
61
64
let dct_scale = component. dct_scale ;
62
65
63
- ComponentMetadata {
66
+ Some ( ComponentMetadata {
67
+ block_width,
64
68
block_count,
65
69
line_stride,
66
70
dct_scale,
67
- }
71
+ } )
68
72
}
69
73
70
74
pub fn append_row_locked (
71
- mutex : & Mutex < ImmediateWorker > ,
75
+ quantization_table : Arc < [ u16 ; 64 ] > ,
76
+ metadata : ComponentMetadata ,
72
77
( index, data) : ( usize , Vec < i16 > ) ,
73
78
result_offset : usize ,
79
+ result_block : & mut [ u8 ] ,
74
80
) {
75
81
// Convert coefficients from a MCU row to samples.
76
- let quantization_table;
77
82
let block_count;
78
83
let line_stride;
79
- let block_size ;
84
+ let block_width ;
80
85
let dct_scale;
81
86
82
87
{
83
- let inner = mutex. lock ( ) . unwrap ( ) ;
84
- quantization_table = inner. quantization_tables [ index] . as_ref ( ) . unwrap ( ) . clone ( ) ;
85
- block_size = inner. components [ index] . as_ref ( ) . unwrap ( ) . block_size ;
86
- let metadata = inner. component_metadata ( index) ;
88
+ let metadata = metadata;
87
89
90
+ block_width = metadata. block_width ;
88
91
block_count = metadata. block_count ;
89
92
line_stride = metadata. line_stride ;
90
93
dct_scale = metadata. dct_scale ;
@@ -94,8 +97,8 @@ impl ImmediateWorker {
94
97
95
98
let mut output_buffer = [ 0 ; 64 ] ;
96
99
for i in 0 ..block_count {
97
- let x = ( i % block_size . width as usize ) * dct_scale;
98
- let y = ( i / block_size . width as usize ) * dct_scale;
100
+ let x = ( i % block_width ) * dct_scale;
101
+ let y = ( i / block_width ) * dct_scale;
99
102
100
103
let coefficients: & [ i16 ; 64 ] = & data[ i * 64 ..( i + 1 ) * 64 ] . try_into ( ) . unwrap ( ) ;
101
104
@@ -111,8 +114,7 @@ impl ImmediateWorker {
111
114
// Lock the mutex only for this write back, not the main computation.
112
115
// FIXME: we are only copying image data. Can we use some atomic backing buffer and a
113
116
// `Relaxed` write instead?
114
- let mut write_back = mutex. lock ( ) . unwrap ( ) ;
115
- let write_back = & mut write_back. results [ index] [ result_offset + y * line_stride + x..] ;
117
+ let write_back = & mut result_block[ y * line_stride + x..] ;
116
118
117
119
let buffered_lines = output_buffer. chunks_mut ( 8 ) ;
118
120
let back_lines = write_back. chunks_mut ( line_stride) ;
@@ -131,18 +133,29 @@ impl super::Worker for Scoped {
131
133
}
132
134
133
135
fn append_row ( & mut self , row : ( usize , Vec < i16 > ) ) -> Result < ( ) > {
136
+ let quantization_table;
137
+ let metadata;
134
138
let ( index, data) = row;
135
139
let result_offset;
140
+ let result_block;
136
141
137
142
{
138
143
let mut inner = self . inner . get_mut ( ) . unwrap ( ) ;
139
- let metadata = inner. component_metadata ( index) ;
144
+ quantization_table = inner. quantization_tables [ index] . as_ref ( ) . unwrap ( ) . clone ( ) ;
145
+ metadata = inner. component_metadata ( index) . unwrap ( ) ;
140
146
141
147
result_offset = inner. offsets [ index] ;
148
+ result_block = & mut inner. results [ index] [ inner. offsets [ index] ..] ;
142
149
inner. offsets [ index] += metadata. bytes_used ( ) ;
143
150
}
144
151
145
- ImmediateWorker :: append_row_locked ( & self . inner , ( index, data) , result_offset) ;
152
+ ImmediateWorker :: append_row_locked (
153
+ quantization_table,
154
+ metadata,
155
+ ( index, data) ,
156
+ result_offset,
157
+ result_block,
158
+ ) ;
146
159
Ok ( ( ) )
147
160
}
148
161
@@ -153,18 +166,47 @@ impl super::Worker for Scoped {
153
166
154
167
// Magic sauce, these _may_ run in parallel.
155
168
fn append_rows ( & mut self , iter : & mut dyn Iterator < Item = ( usize , Vec < i16 > ) > ) -> Result < ( ) > {
169
+ let inner = self . inner . get_mut ( ) . unwrap ( ) ;
156
170
rayon:: in_place_scope ( |scope| {
157
- let mut inner = self . inner . lock ( ) . unwrap ( ) ;
171
+ let metadatas = [
172
+ inner. component_metadata ( 0 ) ,
173
+ inner. component_metadata ( 1 ) ,
174
+ inner. component_metadata ( 2 ) ,
175
+ inner. component_metadata ( 3 ) ,
176
+ ] ;
177
+
178
+ let inner = & mut * inner;
179
+ let [ res0, res1, res2, res3] = & mut inner. results ;
180
+
181
+ // Lazily get the blocks. Note: if we've already collected results from a component
182
+ // then the result vector has already been deallocated/taken. But no more tasks should
183
+ // be created for it.
184
+ let mut result_blocks = [
185
+ res0. get_mut ( inner. offsets [ 0 ] ..) . unwrap_or ( & mut [ ] ) ,
186
+ res1. get_mut ( inner. offsets [ 1 ] ..) . unwrap_or ( & mut [ ] ) ,
187
+ res2. get_mut ( inner. offsets [ 2 ] ..) . unwrap_or ( & mut [ ] ) ,
188
+ res3. get_mut ( inner. offsets [ 3 ] ..) . unwrap_or ( & mut [ ] ) ,
189
+ ] ;
190
+
158
191
// First we schedule everything, making sure their index is right etc.
159
192
for ( index, data) in iter {
160
- let metadata = inner. component_metadata ( index) ;
193
+ let metadata = metadatas[ index] . unwrap ( ) ;
194
+ let quantization_table = inner. quantization_tables [ index] . as_ref ( ) . unwrap ( ) . clone ( ) ;
161
195
162
196
let result_offset = inner. offsets [ index] ;
197
+ let ( result_block, tail) = core:: mem:: replace ( & mut result_blocks[ index] , & mut [ ] )
198
+ . split_at_mut ( metadata. bytes_used ( ) ) ;
199
+ result_blocks[ index] = tail;
163
200
inner. offsets [ index] += metadata. bytes_used ( ) ;
164
201
165
- let mutex = & self . inner ;
166
202
scope. spawn ( move |_| {
167
- ImmediateWorker :: append_row_locked ( mutex, ( index, data) , result_offset)
203
+ ImmediateWorker :: append_row_locked (
204
+ quantization_table,
205
+ metadata,
206
+ ( index, data) ,
207
+ result_offset,
208
+ result_block,
209
+ )
168
210
} ) ;
169
211
}
170
212
0 commit comments