Skip to content

Commit 98bee19

Browse files
committed
Rename "native" CPU feature level to "rust"
The term "native" is confusing, as in GCC/Clang, "native" means use the highest optimization set possible, whereas we were using it to mean the opposite. This also renames all "native" modules accordingly. Also adds documentation on how to change the ASM level at runtime.
1 parent 2bf8918 commit 98bee19

File tree

27 files changed

+150
-145
lines changed

27 files changed

+150
-145
lines changed

CONTRIBUTING.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,13 @@ tail -n+2 dec.y4m > dec
118118
```
119119
cmp rec dec
120120
```
121+
122+
## Setting Assembly Optimization Level
123+
124+
rav1e defaults to using the highest assembly optimization level supported on the current machine.
125+
You can disable assembly or use a lower assembly target at runtime by setting the environment variable `RAV1E_CPU_TARGET`.
126+
127+
For example, `RAV1E_CPU_TARGET=rust` will disable all hand-written assembly optimizations.
128+
`RAV1E_CPU_TARGET=sse2` will enable SSE2 code but disable any newer assembly.
129+
130+
A full list of options can be found in the `CpuFeatureLevel` enum in `src/cpu_features` for your platform.

src/asm/aarch64/mc.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,16 @@ pub fn put_8tap<T: Pixel>(
8787
height: usize, col_frac: i32, row_frac: i32, mode_x: FilterMode,
8888
mode_y: FilterMode, bit_depth: usize, cpu: CpuFeatureLevel,
8989
) {
90-
let call_native = |dst: &mut PlaneRegionMut<'_, T>| {
91-
native::put_8tap(
90+
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
91+
rust::put_8tap(
9292
dst, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth,
9393
cpu,
9494
);
9595
};
9696
#[cfg(feature = "check_asm")]
9797
let ref_dst = {
9898
let mut copy = dst.scratch_copy();
99-
call_native(&mut copy.as_region_mut());
99+
call_rust(&mut copy.as_region_mut());
100100
copy
101101
};
102102
match T::type_enum() {
@@ -114,7 +114,7 @@ pub fn put_8tap<T: Pixel>(
114114
row_frac,
115115
);
116116
},
117-
None => call_native(dst),
117+
None => call_rust(dst),
118118
}
119119
}
120120
PixelType::U16 => {
@@ -132,7 +132,7 @@ pub fn put_8tap<T: Pixel>(
132132
bit_depth as i32,
133133
);
134134
},
135-
None => call_native(dst),
135+
None => call_rust(dst),
136136
}
137137
}
138138
}
@@ -153,8 +153,8 @@ pub fn prep_8tap<T: Pixel>(
153153
col_frac: i32, row_frac: i32, mode_x: FilterMode, mode_y: FilterMode,
154154
bit_depth: usize, cpu: CpuFeatureLevel,
155155
) {
156-
let call_native = |tmp: &mut [i16]| {
157-
native::prep_8tap(
156+
let call_rust = |tmp: &mut [i16]| {
157+
rust::prep_8tap(
158158
tmp, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth,
159159
cpu,
160160
);
@@ -163,7 +163,7 @@ pub fn prep_8tap<T: Pixel>(
163163
let ref_tmp = {
164164
let mut copy = vec![0; width * height];
165165
copy[..].copy_from_slice(&tmp[..width * height]);
166-
call_native(&mut copy);
166+
call_rust(&mut copy);
167167
copy
168168
};
169169
match T::type_enum() {
@@ -180,7 +180,7 @@ pub fn prep_8tap<T: Pixel>(
180180
row_frac,
181181
);
182182
},
183-
None => call_native(tmp),
183+
None => call_rust(tmp),
184184
}
185185
}
186186
PixelType::U16 => {
@@ -197,7 +197,7 @@ pub fn prep_8tap<T: Pixel>(
197197
bit_depth as i32,
198198
);
199199
},
200-
None => call_native(tmp),
200+
None => call_rust(tmp),
201201
}
202202
}
203203
}
@@ -211,13 +211,13 @@ pub fn mc_avg<T: Pixel>(
211211
dst: &mut PlaneRegionMut<'_, T>, tmp1: &[i16], tmp2: &[i16], width: usize,
212212
height: usize, bit_depth: usize, cpu: CpuFeatureLevel,
213213
) {
214-
let call_native = |dst: &mut PlaneRegionMut<'_, T>| {
215-
native::mc_avg(dst, tmp1, tmp2, width, height, bit_depth, cpu);
214+
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
215+
rust::mc_avg(dst, tmp1, tmp2, width, height, bit_depth, cpu);
216216
};
217217
#[cfg(feature = "check_asm")]
218218
let ref_dst = {
219219
let mut copy = dst.scratch_copy();
220-
call_native(&mut copy.as_region_mut());
220+
call_rust(&mut copy.as_region_mut());
221221
copy
222222
};
223223
match T::type_enum() {
@@ -232,7 +232,7 @@ pub fn mc_avg<T: Pixel>(
232232
height as i32,
233233
);
234234
},
235-
None => call_native(dst),
235+
None => call_rust(dst),
236236
},
237237
PixelType::U16 => match AVG_HBD_FNS[cpu.as_index()] {
238238
Some(func) => unsafe {
@@ -246,7 +246,7 @@ pub fn mc_avg<T: Pixel>(
246246
bit_depth as i32,
247247
);
248248
},
249-
None => call_native(dst),
249+
None => call_rust(dst),
250250
},
251251
}
252252
#[cfg(feature = "check_asm")]

src/asm/aarch64/predict.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
use crate::context::MAX_TX_SIZE;
1111
use crate::cpu_features::CpuFeatureLevel;
1212
use crate::predict::{
13-
native, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
13+
rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
1414
};
1515
use crate::tiling::PlaneRegionMut;
1616
use crate::transform::TxSize;
@@ -73,15 +73,15 @@ pub fn dispatch_predict_intra<T: Pixel>(
7373
ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
7474
edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
7575
) {
76-
let call_native = |dst: &mut PlaneRegionMut<'_, T>| {
77-
native::dispatch_predict_intra(
76+
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
77+
rust::dispatch_predict_intra(
7878
mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf,
7979
cpu,
8080
);
8181
};
8282

8383
if size_of::<T>() != 1 {
84-
return call_native(dst);
84+
return call_rust(dst);
8585
}
8686

8787
unsafe {
@@ -130,10 +130,10 @@ pub fn dispatch_predict_intra<T: Pixel>(
130130
PredictionVariant::BOTH => rav1e_ipred_cfl_neon,
131131
})(dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle);
132132
}
133-
_ => call_native(dst),
133+
_ => call_rust(dst),
134134
}
135135
} else {
136-
call_native(dst);
136+
call_rust(dst);
137137
}
138138
}
139139
}

src/asm/aarch64/transform/inverse.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pub fn inverse_transform_add<T: Pixel>(
3939
PixelType::U16 => {}
4040
};
4141

42-
native::inverse_transform_add(input, output, eob, tx_size, tx_type, bd, cpu);
42+
rust::inverse_transform_add(input, output, eob, tx_size, tx_type, bd, cpu);
4343
}
4444

4545
macro_rules! decl_itx_fns {

src/asm/shared/transform/inverse.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,11 @@ pub mod test {
128128
tx_size,
129129
tx_type,
130130
8,
131-
CpuFeatureLevel::NATIVE,
131+
CpuFeatureLevel::RUST,
132132
);
133133

134134
let eob: usize = pick_eob(freq, tx_size, tx_type, sub_h);
135-
let mut native_dst = dst.clone();
135+
let mut rust_dst = dst.clone();
136136

137137
inverse_transform_add(
138138
freq,
@@ -145,14 +145,14 @@ pub mod test {
145145
);
146146
inverse_transform_add(
147147
freq,
148-
&mut native_dst.as_region_mut(),
148+
&mut rust_dst.as_region_mut(),
149149
eob,
150150
tx_size,
151151
tx_type,
152152
8,
153-
CpuFeatureLevel::NATIVE,
153+
CpuFeatureLevel::RUST,
154154
);
155-
assert_eq!(native_dst.data_origin(), dst.data_origin());
155+
assert_eq!(rust_dst.data_origin(), dst.data_origin());
156156
}
157157
}
158158
}

src/asm/x86/cdef.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ pub(crate) unsafe fn cdef_filter_block<T: Pixel>(
4646
pri_strength: i32, sec_strength: i32, dir: usize, damping: i32,
4747
bit_depth: usize, xdec: usize, ydec: usize, cpu: CpuFeatureLevel,
4848
) {
49-
let call_native = |dst: &mut PlaneRegionMut<T>| {
50-
native::cdef_filter_block(
49+
let call_rust = |dst: &mut PlaneRegionMut<T>| {
50+
rust::cdef_filter_block(
5151
dst,
5252
src,
5353
src_stride,
@@ -64,7 +64,7 @@ pub(crate) unsafe fn cdef_filter_block<T: Pixel>(
6464
#[cfg(feature = "check_asm")]
6565
let ref_dst = {
6666
let mut copy = dst.scratch_copy();
67-
call_native(&mut copy.as_region_mut());
67+
call_rust(&mut copy.as_region_mut());
6868
copy
6969
};
7070
match T::type_enum() {
@@ -82,7 +82,7 @@ pub(crate) unsafe fn cdef_filter_block<T: Pixel>(
8282
damping,
8383
);
8484
}
85-
None => call_native(dst),
85+
None => call_rust(dst),
8686
}
8787
}
8888
PixelType::U16 => {
@@ -100,7 +100,7 @@ pub(crate) unsafe fn cdef_filter_block<T: Pixel>(
100100
bit_depth as i32,
101101
);
102102
}
103-
None => call_native(dst),
103+
None => call_rust(dst),
104104
}
105105
}
106106
}
@@ -162,13 +162,13 @@ pub(crate) fn cdef_find_dir<T: Pixel>(
162162
img: &PlaneSlice<'_, u16>, var: &mut u32, coeff_shift: usize,
163163
cpu: CpuFeatureLevel,
164164
) -> i32 {
165-
let call_native =
166-
|var: &mut u32| native::cdef_find_dir::<T>(img, var, coeff_shift, cpu);
165+
let call_rust =
166+
|var: &mut u32| rust::cdef_find_dir::<T>(img, var, coeff_shift, cpu);
167167

168168
#[cfg(feature = "check_asm")]
169169
let (ref_dir, ref_var) = {
170170
let mut var: u32 = 0;
171-
let dir = call_native(&mut var);
171+
let dir = call_rust(&mut var);
172172
(dir, var)
173173
};
174174

@@ -186,10 +186,10 @@ pub(crate) fn cdef_find_dir<T: Pixel>(
186186
)
187187
}
188188
} else {
189-
call_native(var)
189+
call_rust(var)
190190
}
191191
}
192-
PixelType::U16 => call_native(var),
192+
PixelType::U16 => call_rust(var),
193193
};
194194

195195
#[cfg(feature = "check_asm")]
@@ -250,7 +250,7 @@ mod test {
250250
*s = random::<u8>() as u16;
251251
*d = random::<u8>();
252252
}
253-
let mut native_dst = dst.clone();
253+
let mut rust_dst = dst.clone();
254254

255255
let src_stride = width as isize;
256256
let pri_strength = 1;
@@ -260,8 +260,8 @@ mod test {
260260

261261
unsafe {
262262
cdef_filter_block(&mut dst.as_region_mut(), src.as_ptr(), src_stride, pri_strength, sec_strength, dir, damping, bit_depth, $XDEC, $YDEC, CpuFeatureLevel::from_str($OPTLIT).unwrap());
263-
cdef_filter_block(&mut native_dst.as_region_mut(), src.as_ptr(), src_stride, pri_strength, sec_strength, dir, damping, bit_depth, $XDEC, $YDEC, CpuFeatureLevel::NATIVE);
264-
assert_eq!(native_dst.data_origin(), dst.data_origin());
263+
cdef_filter_block(&mut rust_dst.as_region_mut(), src.as_ptr(), src_stride, pri_strength, sec_strength, dir, damping, bit_depth, $XDEC, $YDEC, CpuFeatureLevel::RUST);
264+
assert_eq!(rust_dst.data_origin(), dst.data_origin());
265265
}
266266
}
267267
}

src/asm/x86/dist/mod.rs

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,10 @@ pub fn get_sad<T: Pixel>(
122122
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bsize: BlockSize,
123123
bit_depth: usize, cpu: CpuFeatureLevel,
124124
) -> u32 {
125-
let call_native =
126-
|| -> u32 { native::get_sad(dst, src, bsize, bit_depth, cpu) };
125+
let call_rust = || -> u32 { rust::get_sad(dst, src, bsize, bit_depth, cpu) };
127126

128127
#[cfg(feature = "check_asm")]
129-
let ref_dist = call_native();
128+
let ref_dist = call_rust();
130129

131130
let dist = match T::type_enum() {
132131
PixelType::U8 => match SAD_FNS[cpu.as_index()][to_index(bsize)] {
@@ -138,7 +137,7 @@ pub fn get_sad<T: Pixel>(
138137
T::to_asm_stride(dst.plane_cfg.stride),
139138
)
140139
},
141-
None => call_native(),
140+
None => call_rust(),
142141
},
143142
PixelType::U16 => match SAD_HBD_FNS[cpu.as_index()][to_index(bsize)] {
144143
Some(func) => unsafe {
@@ -149,7 +148,7 @@ pub fn get_sad<T: Pixel>(
149148
T::to_asm_stride(dst.plane_cfg.stride),
150149
)
151150
},
152-
None => call_native(),
151+
None => call_rust(),
153152
},
154153
};
155154

@@ -165,11 +164,11 @@ pub fn get_satd<T: Pixel>(
165164
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bsize: BlockSize,
166165
bit_depth: usize, cpu: CpuFeatureLevel,
167166
) -> u32 {
168-
let call_native =
169-
|| -> u32 { native::get_satd(dst, src, bsize, bit_depth, cpu) };
167+
let call_rust =
168+
|| -> u32 { rust::get_satd(dst, src, bsize, bit_depth, cpu) };
170169

171170
#[cfg(feature = "check_asm")]
172-
let ref_dist = call_native();
171+
let ref_dist = call_rust();
173172

174173
let dist = match T::type_enum() {
175174
PixelType::U8 => match SATD_FNS[cpu.as_index()][to_index(bsize)] {
@@ -181,7 +180,7 @@ pub fn get_satd<T: Pixel>(
181180
T::to_asm_stride(dst.plane_cfg.stride),
182181
)
183182
},
184-
None => call_native(),
183+
None => call_rust(),
185184
},
186185
PixelType::U16 => match SATD_HBD_FNS[cpu.as_index()][to_index(bsize)] {
187186
// Because these are Rust intrinsics, don't use `T::to_asm_stride`.
@@ -193,7 +192,7 @@ pub fn get_satd<T: Pixel>(
193192
dst.plane_cfg.stride as isize,
194193
)
195194
},
196-
None => call_native(),
195+
None => call_rust(),
197196
},
198197
};
199198

@@ -501,9 +500,9 @@ mod test {
501500
*d = random::<u8>() as u16 * $BD / 8;
502501
}
503502
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), bsize, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
504-
let native_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), bsize, $BD, CpuFeatureLevel::NATIVE);
503+
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), bsize, $BD, CpuFeatureLevel::RUST);
505504

506-
assert_eq!(native_result, result);
505+
assert_eq!(rust_result, result);
507506
} else {
508507
// dynamic allocation: test
509508
let mut src = Plane::from_slice(&vec![0u8; $W * $H], $W);
@@ -514,9 +513,9 @@ mod test {
514513
*d = random::<u8>();
515514
}
516515
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), bsize, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
517-
let native_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), bsize, $BD, CpuFeatureLevel::NATIVE);
516+
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), bsize, $BD, CpuFeatureLevel::RUST);
518517

519-
assert_eq!(native_result, result);
518+
assert_eq!(rust_result, result);
520519
}
521520
}
522521
}

0 commit comments

Comments
 (0)