Skip to content

Commit 3fb99cf

Browse files
committed
Optimize multiple io operations
* dma optimizations * ldm/stm optimizations * Use fifo with smaller overhead * Cache div sqrt results
1 parent dacdb66 commit 3fb99cf

File tree

17 files changed

+861
-266
lines changed

17 files changed

+861
-266
lines changed

.cargo/config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[target.armv7-unknown-linux-gnueabihf]
22
linker = "/usr/bin/clang"
3-
rustflags = ["-C", "link-args=-target armv7-unknown-linux-gnueabihf -fuse-ld=lld", "-C", "target-feature=+neon,+thumb-mode,+thumb2,+v5te,+v6,+v6k,+v6t2,+v7,+vfp2,+vfp3"]
3+
rustflags = ["-C", "link-args=-target armv7-unknown-linux-gnueabihf -fuse-ld=lld", "-C", "target-feature=+neon,+thumb-mode,+thumb2,+v5te,+v6,+v6k,+v6t2,+v7,+vfp2,+vfp3", "-Clink-arg=-Wl,--no-rosegment"]

src/core/cycle_manager.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,19 @@ impl CycleManager {
8080
pub fn schedule(&mut self, in_cycles: u32, event_type: EventType) -> u64 {
8181
debug_assert_ne!(in_cycles, 0);
8282
let event_cycle = self.cycle_count + in_cycles as u64;
83-
let mut index = self.events.len();
84-
for i in 0..self.events.len() {
85-
let (cycles, _) = unsafe { self.events.get(i).unwrap_unchecked() };
86-
if *cycles > event_cycle {
87-
index = i;
88-
break;
83+
if in_cycles == 1 {
84+
self.events.insert(0, (event_cycle, event_type))
85+
} else {
86+
let mut index = self.events.len();
87+
for i in 0..self.events.len() {
88+
let (cycles, _) = unsafe { self.events.get(i).unwrap_unchecked() };
89+
if *cycles > event_cycle {
90+
index = i;
91+
break;
92+
}
8993
}
94+
self.events.insert(index, (event_cycle, event_type));
9095
}
91-
self.events.insert(index, (event_cycle, event_type));
9296
event_cycle
9397
}
9498

src/core/div_sqrt.rs

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
use std::hint::unreachable_unchecked;
2+
use std::intrinsics::{unchecked_div, unchecked_rem};
3+
14
pub struct DivSqrt {
25
pub sqrt_cnt: u16,
3-
pub sqrt_result: u32,
6+
sqrt_result: u32,
47
sqrt_param: u64,
8+
sqrt_dirty: bool,
59
pub div_cnt: u16,
610
div_numer: i64,
711
div_denom: i64,
812
div_result: i64,
913
divrem_result: i64,
14+
div_dirty: bool,
1015
}
1116

1217
impl DivSqrt {
@@ -15,11 +20,13 @@ impl DivSqrt {
1520
sqrt_cnt: 0,
1621
sqrt_result: 0,
1722
sqrt_param: 0,
23+
sqrt_dirty: true,
1824
div_cnt: 0,
1925
div_numer: 0,
2026
div_denom: 0,
2127
div_result: 0,
2228
divrem_result: 0,
29+
div_dirty: true,
2330
}
2431
}
2532

@@ -31,7 +38,16 @@ impl DivSqrt {
3138
(self.sqrt_param >> 32) as u32
3239
}
3340

41+
pub fn get_sqrt_result(&mut self) -> u32 {
42+
self.sqrt();
43+
self.sqrt_result
44+
}
45+
3446
fn sqrt(&mut self) {
47+
if !self.sqrt_dirty {
48+
return;
49+
}
50+
self.sqrt_dirty = false;
3551
if self.sqrt_cnt & 1 == 0 {
3652
self.sqrt_result = (self.sqrt_param as u32).isqrt();
3753
} else {
@@ -42,17 +58,17 @@ impl DivSqrt {
4258
pub fn set_sqrt_cnt(&mut self, mut mask: u16, value: u16) {
4359
mask &= 0x1;
4460
self.sqrt_cnt = (self.sqrt_cnt & !mask) | (value & mask);
45-
self.sqrt();
61+
self.sqrt_dirty = true;
4662
}
4763

4864
pub fn set_sqrt_param_l(&mut self, mask: u32, value: u32) {
4965
self.sqrt_param = (self.sqrt_param & !(mask as u64)) | (value & mask) as u64;
50-
self.sqrt();
66+
self.sqrt_dirty = true;
5167
}
5268

5369
pub fn set_sqrt_param_h(&mut self, mask: u32, value: u32) {
5470
self.sqrt_param = (self.sqrt_param & !((mask as u64) << 32)) | (((value & mask) as u64) << 32);
55-
self.sqrt();
71+
self.sqrt_dirty = true;
5672
}
5773

5874
pub fn get_div_numer_l(&self) -> u32 {
@@ -71,23 +87,31 @@ impl DivSqrt {
7187
(self.div_denom as u64 >> 32) as u32
7288
}
7389

74-
pub fn get_div_result_l(&self) -> u32 {
90+
pub fn get_div_result_l(&mut self) -> u32 {
91+
self.div();
7592
self.div_result as u32
7693
}
7794

78-
pub fn get_div_result_h(&self) -> u32 {
95+
pub fn get_div_result_h(&mut self) -> u32 {
96+
self.div();
7997
(self.div_result as u64 >> 32) as u32
8098
}
8199

82-
pub fn get_divrem_result_l(&self) -> u32 {
100+
pub fn get_divrem_result_l(&mut self) -> u32 {
101+
self.div();
83102
self.divrem_result as u32
84103
}
85104

86-
pub fn get_divrem_result_h(&self) -> u32 {
105+
pub fn get_divrem_result_h(&mut self) -> u32 {
106+
self.div();
87107
(self.divrem_result as u64 >> 32) as u32
88108
}
89109

90110
fn div(&mut self) {
111+
if !self.div_dirty {
112+
return;
113+
}
114+
self.div_dirty = false;
91115
if self.div_denom == 0 {
92116
self.div_cnt |= 1 << 14;
93117
} else {
@@ -98,74 +122,48 @@ impl DivSqrt {
98122
0 => {
99123
let num = self.div_numer as i32;
100124
let denom = self.div_denom as i32;
101-
if num == i32::MIN && denom == -1 {
102-
self.div_result = (num as u64 ^ ((!0u32 as u64) << 32)) as i64;
103-
self.divrem_result = 0;
104-
} else if denom != 0 {
105-
self.div_result = (num / denom) as i64;
106-
self.divrem_result = (num % denom) as i64;
107-
} else {
108-
self.div_result = (if num < 0 { 1 } else { -1i32 } as u64 ^ ((!0u32 as u64) << 32)) as i64;
109-
self.divrem_result = num as i64;
110-
}
125+
self.div_result = unsafe { unchecked_div(num, denom) } as i64;
126+
self.divrem_result = unsafe { unchecked_rem(num, denom) } as i64;
111127
}
112128
1 => {
113129
let num = self.div_numer;
114130
let denom = self.div_denom as i32;
115-
if num == i64::MIN && denom == -1 {
116-
self.div_result = num;
117-
self.divrem_result = 0;
118-
} else if denom != 0 {
119-
self.div_result = num / denom as i64;
120-
self.divrem_result = num % denom as i64;
121-
} else {
122-
self.div_result = if num < 0 { 1 } else { -1 };
123-
self.divrem_result = num;
124-
}
131+
self.div_result = unsafe { unchecked_div(num, denom as i64) };
132+
self.divrem_result = unsafe { unchecked_rem(num, denom as i64) };
125133
}
126134
2 => {
127135
let num = self.div_numer;
128136
let denom = self.div_denom;
129-
if num == i64::MIN && denom == -1 {
130-
self.div_result = num;
131-
self.divrem_result = 0;
132-
} else if denom != 0 {
133-
self.div_result = num / denom;
134-
self.divrem_result = num % denom;
135-
} else {
136-
self.div_result = if num < 0 { 1 } else { -1 };
137-
self.divrem_result = num;
138-
}
139-
}
140-
_ => {
141-
unreachable!()
137+
self.div_result = unsafe { unchecked_div(num, denom) };
138+
self.divrem_result = unsafe { unchecked_rem(num, denom) };
142139
}
140+
_ => unsafe { unreachable_unchecked() },
143141
}
144142
}
145143

146144
pub fn set_div_cnt(&mut self, mut mask: u16, value: u16) {
147145
mask &= 0x3;
148146
self.div_cnt = (self.div_cnt & !mask) | (value & mask);
149-
self.div();
147+
self.div_dirty = true;
150148
}
151149

152150
pub fn set_div_numer_l(&mut self, mask: u32, value: u32) {
153151
self.div_numer = ((self.div_numer as u64 & !(mask as u64)) | (value & mask) as u64) as i64;
154-
self.div();
152+
self.div_dirty = true;
155153
}
156154

157155
pub fn set_div_numer_h(&mut self, mask: u32, value: u32) {
158156
self.div_numer = ((self.div_numer as u64 & !((mask as u64) << 32)) | (((value & mask) as u64) << 32)) as i64;
159-
self.div();
157+
self.div_dirty = true;
160158
}
161159

162160
pub fn set_div_denom_l(&mut self, mask: u32, value: u32) {
163161
self.div_denom = ((self.div_denom as u64 & !(mask as u64)) | (value & mask) as u64) as i64;
164-
self.div();
162+
self.div_dirty = true;
165163
}
166164

167165
pub fn set_div_denom_h(&mut self, mask: u32, value: u32) {
168166
self.div_denom = ((self.div_denom as u64 & !((mask as u64) << 32)) | (((value & mask) as u64) << 32)) as i64;
169-
self.div();
167+
self.div_dirty = true;
170168
}
171169
}

src/core/graphics/gpu_3d/registers_3d.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ use crate::core::emu::{get_cm_mut, get_cpu_regs_mut, get_mem_mut, io_dma, Emu};
33
use crate::core::graphics::gpu::{DISPLAY_HEIGHT, DISPLAY_WIDTH};
44
use crate::core::memory::dma::DmaTransferMode;
55
use crate::core::CpuType::ARM9;
6+
use crate::fixed_fifo::FixedFifo;
67
use crate::math::{Matrix, Vectori16, Vectori32, Vectoru16};
78
use crate::utils::{rgb5_to_rgb6, HeapMem};
89
use bilge::prelude::*;
9-
use std::collections::VecDeque;
1010
use std::hint::unreachable_unchecked;
11+
use std::intrinsics::unlikely;
1112
use std::mem;
1213

1314
#[bitsize(32)]
@@ -157,7 +158,7 @@ const FIFO_PARAM_COUNTS: [u8; 128] = [
157158
3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7F
158159
];
159160

160-
#[derive(Copy, Clone)]
161+
#[derive(Copy, Clone, Default)]
161162
struct Entry {
162163
cmd: u8,
163164
param: u32,
@@ -257,7 +258,11 @@ fn clip_polygon(unclipped: &[Vectori32<4>; 4], clipped: &mut [Vectori32<4>; 10],
257258

258259
for j in 0..old_size {
259260
let current = &vertices[j];
260-
let previous = &vertices[(j.wrapping_sub(1).wrapping_add(old_size)) % old_size];
261+
let mut previous_index = j + 1;
262+
if unlikely(previous_index == old_size) {
263+
previous_index = 0;
264+
}
265+
let previous = &vertices[previous_index];
261266

262267
let (current_val, previous_val) = match i {
263268
0 => (current[0], previous[0]),
@@ -390,7 +395,7 @@ pub struct Polygons {
390395

391396
#[derive(Default)]
392397
pub struct Gpu3DRegisters {
393-
cmd_fifo: VecDeque<Entry>,
398+
cmd_fifo: FixedFifo<Entry, 512>,
394399
cmd_pipe_size: u8,
395400
test_queue: u32,
396401
mtx_queue: u32,
@@ -496,16 +501,17 @@ impl Gpu3DRegisters {
496501

497502
while !self.cmd_fifo.is_empty() && executed_cycles < cycle_diff && !self.flushed {
498503
let mut params = Vec::new();
499-
let entry = unsafe { *self.cmd_fifo.front().unwrap_unchecked() };
500-
let mut param_count = FIFO_PARAM_COUNTS[entry.cmd as usize];
504+
let entry = unsafe { *self.cmd_fifo.front_unchecked() };
505+
let mut param_count = unsafe { *FIFO_PARAM_COUNTS.get_unchecked(entry.cmd as usize) };
501506
if param_count > 1 {
502507
if param_count as usize > self.cmd_fifo.len() {
503508
break;
504509
}
505510

506511
params.reserve(param_count as usize);
507512
for _ in 0..param_count {
508-
params.push(unsafe { self.cmd_fifo.pop_front().unwrap_unchecked().param });
513+
params.push(unsafe { self.cmd_fifo.front_unchecked().param });
514+
self.cmd_fifo.pop_front();
509515
}
510516
} else {
511517
param_count = 1;
@@ -1322,8 +1328,8 @@ impl Gpu3DRegisters {
13221328
}
13231329

13241330
fn queue_entry(&mut self, entry: Entry, emu: &mut Emu) {
1325-
if self.cmd_fifo.is_empty() && !self.is_cmd_pipe_full() {
1326-
self.cmd_fifo.push_back(entry);
1331+
if !self.is_cmd_pipe_full() {
1332+
unsafe { self.cmd_fifo.push_back_unchecked(entry) };
13271333
self.cmd_pipe_size += 1;
13281334
self.gx_stat.set_geometry_busy(true);
13291335
} else {
@@ -1332,7 +1338,7 @@ impl Gpu3DRegisters {
13321338
get_cpu_regs_mut!(emu, ARM9).halt(1);
13331339
}
13341340

1335-
self.cmd_fifo.push_back(entry);
1341+
unsafe { self.cmd_fifo.push_back_unchecked(entry) };
13361342
self.gx_stat.set_num_entries_cmd_fifo(u9::new(self.cmd_fifo.len() as u16 - self.cmd_pipe_size as u16));
13371343
self.gx_stat.set_cmd_fifo_empty(false);
13381344

@@ -1387,13 +1393,13 @@ impl Gpu3DRegisters {
13871393
self.queue_entry(Entry::new(self.gx_fifo as u8, value & mask), emu);
13881394
self.cmd_fifo_param_count += 1;
13891395

1390-
if self.cmd_fifo_param_count == unsafe { *FIFO_PARAM_COUNTS.get_unchecked((self.gx_fifo & 0xFF) as usize) } as u32 {
1396+
if self.cmd_fifo_param_count == FIFO_PARAM_COUNTS[(self.gx_fifo & 0x7F) as usize] as u32 {
13911397
self.gx_fifo >>= 8;
13921398
self.cmd_fifo_param_count = 0;
13931399
}
13941400
}
13951401

1396-
while self.gx_fifo != 0 && unsafe { *FIFO_PARAM_COUNTS.get_unchecked((self.gx_fifo & 0xFF) as usize) } == 0 {
1402+
while self.gx_fifo != 0 && FIFO_PARAM_COUNTS[(self.gx_fifo & 0x7F) as usize] == 0 {
13971403
self.queue_entry(Entry::new(self.gx_fifo as u8, value & mask), emu);
13981404
self.gx_fifo >>= 8;
13991405
}

0 commit comments

Comments
 (0)