Skip to content

Commit f9ce7e6

Browse files
committed
WIP
1 parent bd19a98 commit f9ce7e6

File tree

3 files changed

+115
-44
lines changed

3 files changed

+115
-44
lines changed

src/core/graphics/gpu_3d/registers_3d.rs

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ use crate::core::graphics::gpu::{DISPLAY_HEIGHT, DISPLAY_WIDTH};
44
use crate::core::memory::dma::DmaTransferMode;
55
use crate::core::CpuType::ARM9;
66
use crate::fixed_fifo::FixedFifo;
7-
use crate::math::{Matrix, Vectori16, Vectori32, Vectoru16};
7+
use crate::math::{Matrix, Vectorf32, Vectori16, Vectori32, Vectoru16};
88
use crate::utils::{rgb5_to_rgb6, HeapMem};
99
use bilge::prelude::*;
1010
use std::hint::unreachable_unchecked;
11-
use std::intrinsics::{unchecked_div, unlikely};
11+
use std::intrinsics::unlikely;
1212
use std::mem;
1313

1414
#[bitsize(32)]
@@ -232,25 +232,32 @@ pub struct Vertex {
232232
pub color: u32,
233233
}
234234

235-
fn intersect(v1: &Vectori32<4>, v2: &Vectori32<4>, val1: i32, val2: i32) -> Vectori32<4> {
236-
let d1 = val1 as i64 + v1[3] as i64;
237-
let d2 = val2 as i64 + v2[3] as i64;
238-
if d2 == d1 {
235+
fn intersect(v1: &Vectorf32<4>, v2: &Vectorf32<4>, val1: f32, val2: f32) -> Vectorf32<4> {
236+
let d1 = val1 + v1[3];
237+
let d2 = val2 + v2[3];
238+
if (d2 - d1).abs() < f32::EPSILON {
239239
return *v1;
240240
}
241241

242-
let mut vertex = Vectori32::default();
243-
for i in 0..4 {
244-
vertex[i] = v1[i] + unsafe { unchecked_div((v2[i] - v1[i]) as i64 * -d1, d2 - d1) } as i32;
245-
}
242+
let mut vertex = Vectorf32::default();
243+
let dist_inverse = -d1 / (d2 - d1);
244+
vertex[0] = v1[0] + ((v2[0] - v1[0]) * dist_inverse);
245+
vertex[1] = v1[1] + ((v2[1] - v1[1]) * dist_inverse);
246+
vertex[2] = v1[2] + ((v2[2] - v1[2]) * dist_inverse);
247+
vertex[3] = v1[3] + ((v2[3] - v1[3]) * dist_inverse);
246248
vertex
247249
}
248250

249-
fn clip_polygon(unclipped: &[Vectori32<4>; 4], clipped: &mut [Vectori32<4>; 10], size: &mut usize) -> bool {
251+
fn clip_polygon(unclipped: &[Vectori32<4>; 4], clipped: &mut [Vectorf32<4>; 10], size: &mut usize) -> bool {
250252
let mut clip = false;
251253

252-
let mut vertices = [Vectori32::<4>::default(); 10];
253-
vertices[..4].copy_from_slice(unclipped);
254+
let mut vertices = [Vectorf32::<4>::default(); 10];
255+
for i in 0..4 {
256+
for j in 0..4 {
257+
const NORMALIZE: f32 = 1f32 / 4096f32;
258+
vertices[i][j] = unclipped[i][j] as f32 * NORMALIZE;
259+
}
260+
}
254261

255262
for i in 0..6 {
256263
let old_size = *size;
@@ -1053,7 +1060,7 @@ impl Gpu3DRegisters {
10531060

10541061
for i in 0..6 {
10551062
let mut size = 4;
1056-
let mut clipped = [Vectori32::<4>::default(); 10];
1063+
let mut clipped = [Vectorf32::<4>::default(); 10];
10571064

10581065
clip_polygon(&faces[i], &mut clipped, &mut size);
10591066

@@ -1222,7 +1229,7 @@ impl Gpu3DRegisters {
12221229
self.clockwise = !self.clockwise;
12231230
}
12241231

1225-
let mut clipped = [Vectori32::<4>::default(); 10];
1232+
let mut clipped = [Vectorf32::<4>::default(); 10];
12261233
let cull = (!self.render_front && dot > 0) || (!self.render_back && dot < 0);
12271234
let mut clipped_size = self.saved_polygon.size;
12281235
let clip = if cull { false } else { clip_polygon(&unclipped, &mut clipped, &mut clipped_size) };

src/jit/jit_memory.rs

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ use crate::jit::reg::Reg;
1111
use crate::jit::Cond;
1212
use crate::logging::debug_println;
1313
use crate::mmap::{flush_icache, Mmap, PAGE_SIZE};
14+
use crate::utils;
1415
use crate::utils::{HeapMem, HeapMemU8};
15-
use crate::{utils, IS_DEBUG};
1616
use paste::paste;
1717
use std::intrinsics::unlikely;
1818
use std::marker::ConstParamTy;
@@ -94,8 +94,6 @@ pub struct JitLiveRanges {
9494
pub vram_arm7: HeapMemU8<{ (vram::ARM7_SIZE / JIT_LIVE_RANGE_PAGE_SIZE / 8) as usize }>,
9595
}
9696

97-
const JIT_PERF_MAP_RECORD: bool = IS_DEBUG;
98-
9997
#[cfg(target_os = "linux")]
10098
struct JitPerfMapRecord {
10199
common_records: Vec<(usize, usize, String)>,
@@ -115,27 +113,21 @@ impl JitPerfMapRecord {
115113
}
116114

117115
fn record_common(&mut self, jit_start: usize, jit_size: usize, name: impl AsRef<str>) {
118-
if JIT_PERF_MAP_RECORD {
119-
self.common_records.push((jit_start, jit_size, name.as_ref().to_string()));
120-
use std::io::Write;
121-
writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {}", name.as_ref()).unwrap();
122-
}
116+
self.common_records.push((jit_start, jit_size, name.as_ref().to_string()));
117+
use std::io::Write;
118+
writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {}", name.as_ref()).unwrap();
123119
}
124120

125121
fn record(&mut self, jit_start: usize, jit_size: usize, guest_pc: u32, cpu_type: CpuType) {
126-
if JIT_PERF_MAP_RECORD {
127-
use std::io::Write;
128-
writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {cpu_type:?}_{guest_pc:x}").unwrap();
129-
}
122+
use std::io::Write;
123+
writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {cpu_type:?}_{guest_pc:x}").unwrap();
130124
}
131125

132126
fn reset(&mut self) {
133-
if JIT_PERF_MAP_RECORD {
134-
self.perf_map = std::fs::File::create(&self.perf_map_path).unwrap();
135-
for (jit_start, jit_size, name) in &self.common_records {
136-
use std::io::Write;
137-
writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {name}").unwrap();
138-
}
127+
self.perf_map = std::fs::File::create(&self.perf_map_path).unwrap();
128+
for (jit_start, jit_size, name) in &self.common_records {
129+
use std::io::Write;
130+
writeln!(self.perf_map, "{jit_start:x} {jit_size:x} {name}").unwrap();
139131
}
140132
}
141133
}

src/math.rs

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use paste::paste;
2+
use std::arch::asm;
3+
use std::fmt::{Debug, Formatter};
4+
use std::ops;
25
use std::ops::{Index, IndexMut};
3-
use std::{mem, ops};
46

57
#[derive(Copy, Clone)]
68
pub struct Matrix([i32; 16]);
@@ -23,6 +25,16 @@ impl ops::Mul for Matrix {
2325
}
2426
}
2527

28+
impl Debug for Matrix {
29+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
30+
let mut debug = f.debug_list();
31+
for n in &self.0 {
32+
debug.entry(n);
33+
}
34+
debug.finish()
35+
}
36+
}
37+
2638
impl AsRef<[i32; 16]> for Matrix {
2739
fn as_ref(&self) -> &[i32; 16] {
2840
&self.0
@@ -69,7 +81,7 @@ macro_rules! define_vector {
6981

7082
impl<const SIZE: usize> Default for [<Vector $t>]<SIZE> {
7183
fn default() -> Self {
72-
unsafe { mem::zeroed() }
84+
[<Vector $t>]([$t::default(); SIZE])
7385
}
7486
}
7587

@@ -109,7 +121,7 @@ macro_rules! define_vector {
109121
self
110122
}
111123
}
112-
124+
113125
impl From<[<Vector $t>]<3>> for [<Vector $t>]<4> {
114126
fn from(value: [<Vector $t>]<3>) -> Self {
115127
let mut ret = Self::default();
@@ -119,13 +131,24 @@ macro_rules! define_vector {
119131
ret
120132
}
121133
}
134+
135+
impl<const SIZE: usize> Debug for [<Vector $t>]<SIZE> {
136+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137+
let mut debug = f.debug_list();
138+
for n in &self.0 {
139+
debug.entry(n);
140+
}
141+
debug.finish()
142+
}
143+
}
122144
}
123145
};
124146
}
125147

126148
define_vector!(u16);
127149
define_vector!(i16);
128150
define_vector!(i32);
151+
define_vector!(f32);
129152

130153
impl ops::Mul<Matrix> for Vectori32<3> {
131154
type Output = Self;
@@ -144,10 +167,33 @@ impl ops::Mul<Matrix> for Vectori32<4> {
144167

145168
fn mul(self, rhs: Matrix) -> Self::Output {
146169
let mut ret = Vectori32::default();
147-
ret[0] = ((self[0] as i64 * rhs[0] as i64 + self[1] as i64 * rhs[4] as i64 + self[2] as i64 * rhs[8] as i64 + self[3] as i64 * rhs[12] as i64) >> 12) as i32;
148-
ret[1] = ((self[0] as i64 * rhs[1] as i64 + self[1] as i64 * rhs[5] as i64 + self[2] as i64 * rhs[9] as i64 + self[3] as i64 * rhs[13] as i64) >> 12) as i32;
149-
ret[2] = ((self[0] as i64 * rhs[2] as i64 + self[1] as i64 * rhs[6] as i64 + self[2] as i64 * rhs[10] as i64 + self[3] as i64 * rhs[14] as i64) >> 12) as i32;
150-
ret[3] = ((self[0] as i64 * rhs[3] as i64 + self[1] as i64 * rhs[7] as i64 + self[2] as i64 * rhs[11] as i64 + self[3] as i64 * rhs[15] as i64) >> 12) as i32;
170+
unsafe {
171+
asm!(
172+
"vld1.s32 {{q0}}, [{v}]",
173+
"vld1.s32 {{q1}}, [{m}]!",
174+
"vld1.s32 {{q2}}, [{m}]!",
175+
"vld1.s32 {{q3}}, [{m}]!",
176+
"vld1.s32 {{q4}}, [{m}]",
177+
"vmull.s32 q5, d2, d0[0]",
178+
"vmlal.s32 q5, d4, d0[1]",
179+
"vmlal.s32 q5, d6, d1[0]",
180+
"vmlal.s32 q5, d8, d1[1]",
181+
"vmull.s32 q6, d3, d0[0]",
182+
"vmlal.s32 q6, d5, d0[1]",
183+
"vmlal.s32 q6, d7, d1[0]",
184+
"vmlal.s32 q6, d9, d1[1]",
185+
"vshr.s64 q5, q5, 12",
186+
"vshr.s64 q6, q6, 12",
187+
"vstr.32 s20, [{ret}]",
188+
"vstr.32 s22, [{ret}, 4]",
189+
"vstr.32 s24, [{ret}, 8]",
190+
"vstr.32 s26, [{ret}, 12]",
191+
v = in(reg) self.0.as_ptr(),
192+
m = in(reg) rhs.0.as_ptr(),
193+
ret = in(reg) ret.0.as_mut_ptr(),
194+
options(preserves_flags, nostack),
195+
);
196+
}
151197
ret
152198
}
153199
}
@@ -164,14 +210,40 @@ impl ops::MulAssign<Matrix> for Vectori32<4> {
164210
}
165211
}
166212

167-
impl<const SIZE: usize> ops::Mul for Vectori32<SIZE> {
213+
impl ops::Mul for Vectori32<3> {
168214
type Output = i32;
169215

170216
fn mul(self, rhs: Self) -> Self::Output {
217+
/* Vectorization of
171218
let mut dot = 0;
172-
for i in 0..SIZE {
173-
dot += self[i] as i64 * rhs[i] as i64;
174-
}
219+
dot += self[0] as i64 * rhs[0] as i64;
220+
dot += self[1] as i64 * rhs[1] as i64;
221+
dot += self[2] as i64 * rhs[2] as i64;
175222
(dot >> 12) as i32
223+
*/
224+
225+
let v1 = self.0.as_ptr();
226+
let v2 = rhs.0.as_ptr();
227+
let mut dot: i32;
228+
unsafe {
229+
asm!(
230+
"vmov.s32 d1, 0",
231+
"vmov.s32 d3, 0",
232+
"vld1.s32 {{d0}}, [{v1}]!",
233+
"vld1.s32 {{d1[0]}}, [{v1}]",
234+
"vld1.s32 {{d2}}, [{v2}]!",
235+
"vld1.s32 {{d3[0]}}, [{v2}]",
236+
"vmull.s32 q2, d0, d2",
237+
"vmlal.s32 q2, d1, d3",
238+
"vadd.s64 d4, d4, d5",
239+
"vshr.s64 d4, d4, 12",
240+
"vmov.s32 {dot}, d4[0]",
241+
v1 = in(reg) v1,
242+
v2 = in(reg) v2,
243+
dot = out(reg) dot,
244+
options(pure, readonly, preserves_flags, nostack),
245+
);
246+
}
247+
dot
176248
}
177249
}

0 commit comments

Comments
 (0)