Skip to content

Commit c2bfad0

Browse files
committed
Use link list for scheduler
* Some clean ups in rendering
1 parent 8ef2eb2 commit c2bfad0

File tree

9 files changed

+421
-325
lines changed

9 files changed

+421
-325
lines changed

src/core/cycle_manager.rs

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,44 @@ use crate::core::memory::dma::Dma;
77
use crate::core::spu::Spu;
88
use crate::core::timers::Timers;
99
use crate::core::CpuType::{ARM7, ARM9};
10-
use std::collections::VecDeque;
10+
use crate::linked_list::{LinkedList, LinkedListAllocator, LinkedListEntry};
11+
use std::alloc::{GlobalAlloc, Layout, System};
1112
use std::intrinsics::unlikely;
13+
use std::ptr;
14+
15+
struct CycleEventEntry {
16+
cycle_count: u64,
17+
event_type: EventType,
18+
}
19+
20+
impl CycleEventEntry {
21+
fn new(cycle_count: u64, event_type: EventType) -> Self {
22+
CycleEventEntry { cycle_count, event_type }
23+
}
24+
}
25+
26+
#[derive(Default)]
27+
struct CycleEventsListAllocator(Vec<*mut LinkedListEntry<CycleEventEntry>>);
28+
29+
impl LinkedListAllocator<CycleEventEntry> for CycleEventsListAllocator {
30+
fn allocate(&mut self, value: CycleEventEntry) -> *mut LinkedListEntry<CycleEventEntry> {
31+
let entry = if self.0.is_empty() {
32+
unsafe { System.alloc(Layout::new::<LinkedListEntry<CycleEventEntry>>()) as *mut LinkedListEntry<CycleEventEntry> }
33+
} else {
34+
unsafe { self.0.pop().unwrap_unchecked() }
35+
};
36+
unsafe {
37+
(*entry).value = value;
38+
(*entry).previous = ptr::null_mut();
39+
(*entry).next = ptr::null_mut();
40+
}
41+
entry
42+
}
43+
44+
fn deallocate(&mut self, entry: *mut LinkedListEntry<CycleEventEntry>) {
45+
self.0.push(entry);
46+
}
47+
}
1248

1349
#[derive(Debug)]
1450
pub enum EventType {
@@ -29,14 +65,14 @@ pub enum EventType {
2965

3066
pub struct CycleManager {
3167
cycle_count: u64,
32-
events: VecDeque<(u64, EventType)>,
68+
events: LinkedList<CycleEventEntry, CycleEventsListAllocator>,
3369
}
3470

3571
impl CycleManager {
3672
pub fn new() -> Self {
3773
CycleManager {
3874
cycle_count: 0,
39-
events: VecDeque::new(),
75+
events: LinkedList::new(),
4076
}
4177
}
4278

@@ -53,25 +89,25 @@ impl CycleManager {
5389
let cycle_count = self.cycle_count;
5490
let mut event_triggered = false;
5591
while {
56-
let (cycles, _) = unsafe { self.events.front().unwrap_unchecked() };
57-
unlikely(*cycles <= cycle_count)
92+
let entry = &LinkedList::<_, CycleEventsListAllocator>::deref(self.events.root).value;
93+
unlikely(entry.cycle_count <= cycle_count)
5894
} {
5995
event_triggered = true;
60-
let (cycles, event) = unsafe { self.events.pop_front().unwrap_unchecked() };
61-
match event {
96+
let entry = self.events.remove_begin();
97+
match entry.event_type {
6298
EventType::CpuInterruptArm9 => CpuRegs::on_interrupt_event::<{ ARM9 }>(emu),
6399
EventType::CpuInterruptArm7 => CpuRegs::on_interrupt_event::<{ ARM7 }>(emu),
64-
EventType::GpuScanline256 => Gpu::on_scanline256_event(emu),
65-
EventType::GpuScanline355 => Gpu::on_scanline355_event(emu),
66-
EventType::SoundCmdHle => SoundNitro::on_cmd_event(emu),
67-
EventType::SoundAlarmHle(id) => SoundNitro::on_alarm_event(id, emu),
100+
EventType::GpuScanline256 => Gpu::on_scanline256_event(self, emu),
101+
EventType::GpuScanline355 => Gpu::on_scanline355_event(self, emu),
102+
EventType::SoundCmdHle => SoundNitro::on_cmd_event(self, emu),
103+
EventType::SoundAlarmHle(id) => SoundNitro::on_alarm_event(id, self, emu),
68104
EventType::CartridgeWordReadArm9 => Cartridge::on_word_read_event::<{ ARM9 }>(emu),
69105
EventType::CartridgeWordReadArm7 => Cartridge::on_word_read_event::<{ ARM7 }>(emu),
70106
EventType::DmaArm9(channel) => Dma::on_event::<{ ARM9 }>(channel, emu),
71107
EventType::DmaArm7(channel) => Dma::on_event::<{ ARM7 }>(channel, emu),
72108
EventType::SpuSample => Spu::on_sample_event(emu),
73-
EventType::TimerArm9(channel) => Timers::on_overflow_event::<{ ARM9 }>(cycles, channel, emu),
74-
EventType::TimerArm7(channel) => Timers::on_overflow_event::<{ ARM7 }>(cycles, channel, emu),
109+
EventType::TimerArm9(channel) => Timers::on_overflow_event::<{ ARM9 }>(entry.cycle_count, channel, emu),
110+
EventType::TimerArm7(channel) => Timers::on_overflow_event::<{ ARM7 }>(entry.cycle_count, channel, emu),
75111
}
76112
}
77113
event_triggered
@@ -80,19 +116,21 @@ impl CycleManager {
80116
pub fn schedule(&mut self, in_cycles: u32, event_type: EventType) -> u64 {
81117
debug_assert_ne!(in_cycles, 0);
82118
let event_cycle = self.cycle_count + in_cycles as u64;
83-
let mut index = self.events.len();
84-
for i in 0..self.events.len() {
85-
let (cycles, _) = unsafe { self.events.get(i).unwrap_unchecked() };
86-
if *cycles > event_cycle {
87-
index = i;
88-
break;
119+
120+
let mut current_node = self.events.root;
121+
while !current_node.is_null() {
122+
let entry = LinkedList::<_, CycleEventsListAllocator>::deref(current_node);
123+
if entry.value.cycle_count > event_cycle {
124+
self.events.insert_entry_begin(current_node, CycleEventEntry::new(event_cycle, event_type));
125+
return event_cycle;
89126
}
127+
current_node = entry.next;
90128
}
91-
self.events.insert(index, (event_cycle, event_type));
129+
self.events.insert_end(CycleEventEntry::new(event_cycle, event_type));
92130
event_cycle
93131
}
94132

95133
pub fn jump_to_next_event(&mut self) {
96-
self.cycle_count = unsafe { self.events.front().unwrap_unchecked().0 };
134+
self.cycle_count = LinkedList::<_, CycleEventsListAllocator>::deref(self.events.root).value.cycle_count
97135
}
98136
}

src/core/graphics/gpu.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::core::cpu_regs::InterruptFlag;
22
use crate::core::cycle_manager::{CycleManager, EventType};
3-
use crate::core::emu::{get_arm7_hle_mut, get_cm_mut, get_common_mut, get_cpu_regs_mut, get_mem_mut, io_dma, Emu};
3+
use crate::core::emu::{get_arm7_hle_mut, get_common_mut, get_cpu_regs_mut, get_mem_mut, io_dma, Emu};
44
use crate::core::graphics::gpu_2d::registers_2d::Gpu2DRegisters;
55
use crate::core::graphics::gpu_2d::Gpu2DEngine::{A, B};
66
use crate::core::graphics::gpu_3d::registers_3d::Gpu3DRegisters;
@@ -141,7 +141,8 @@ impl Gpu {
141141
self.disp_cap_cnt = (self.disp_cap_cnt & !mask) | (value & mask);
142142
}
143143

144-
pub fn on_scanline256_event(emu: &mut Emu) {
144+
#[inline(never)]
145+
pub fn on_scanline256_event(cm: &mut CycleManager, emu: &mut Emu) {
145146
let gpu = &mut get_common_mut!(emu).gpu;
146147

147148
if gpu.v_count < 192 {
@@ -151,7 +152,7 @@ impl Gpu {
151152
.as_mut()
152153
.on_scanline(&mut gpu.gpu_2d_regs_a, &mut gpu.gpu_2d_regs_b, gpu.v_count as u8)
153154
}
154-
io_dma!(emu, ARM9).trigger_all(DmaTransferMode::StartAtHBlank, get_cm_mut!(emu));
155+
io_dma!(emu, ARM9).trigger_all(DmaTransferMode::StartAtHBlank, cm);
155156
}
156157

157158
for i in 0..2 {
@@ -162,10 +163,11 @@ impl Gpu {
162163
}
163164
}
164165

165-
get_cm_mut!(emu).schedule((355 - 256) * 6, EventType::GpuScanline355);
166+
cm.schedule((355 - 256) * 6, EventType::GpuScanline355);
166167
}
167168

168-
pub fn on_scanline355_event(emu: &mut Emu) {
169+
#[inline(never)]
170+
pub fn on_scanline355_event(cm: &mut CycleManager, emu: &mut Emu) {
169171
let gpu = &mut get_common_mut!(emu).gpu;
170172

171173
gpu.v_count += 1;
@@ -186,7 +188,7 @@ impl Gpu {
186188
disp_stat.set_v_blank_flag(u1::new(1));
187189
if disp_stat.v_blank_irq_enable() {
188190
get_cpu_regs_mut!(emu, CpuType::from(i as u8)).send_interrupt(InterruptFlag::LcdVBlank, emu);
189-
io_dma!(emu, CpuType::from(i as u8)).trigger_all(DmaTransferMode::StartAtVBlank, get_cm_mut!(emu));
191+
io_dma!(emu, CpuType::from(i as u8)).trigger_all(DmaTransferMode::StartAtVBlank, cm);
190192
}
191193
}
192194
}
@@ -225,6 +227,6 @@ impl Gpu {
225227
get_arm7_hle_mut!(emu).on_scanline(gpu.v_count, emu);
226228
}
227229

228-
get_cm_mut!(emu).schedule(256 * 6, EventType::GpuScanline256);
230+
cm.schedule(256 * 6, EventType::GpuScanline256);
229231
}
230232
}

src/core/graphics/gpu_3d/registers_3d.rs

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::fixed_fifo::FixedFifo;
77
use crate::math::{Matrix, Vectorf32, Vectori16, Vectori32, Vectoru16};
88
use crate::utils::{rgb5_to_rgb6, HeapMem};
99
use bilge::prelude::*;
10+
use std::arch::arm::{vcvtq_f32_s32, vld1q_s32, vmulq_n_f32, vst1q_f32};
1011
use std::hint::unreachable_unchecked;
1112
use std::intrinsics::unlikely;
1213
use std::mem;
@@ -241,24 +242,39 @@ fn intersect(v1: &Vectorf32<4>, v2: &Vectorf32<4>, val1: f32, val2: f32) -> Vect
241242
return *v1;
242243
}
243244

244-
let mut vertex = Vectorf32::default();
245-
let dist_inverse = -d1 / (d2 - d1);
246-
vertex[0] = v1[0] + ((v2[0] - v1[0]) * dist_inverse);
247-
vertex[1] = v1[1] + ((v2[1] - v1[1]) * dist_inverse);
248-
vertex[2] = v1[2] + ((v2[2] - v1[2]) * dist_inverse);
249-
vertex[3] = v1[3] + ((v2[3] - v1[3]) * dist_inverse);
245+
let mut vertex: Vectorf32<4> = unsafe { MaybeUninit::uninit().assume_init() };
246+
let dist_inverse = -d1 as f64 / (d2 - d1) as f64;
247+
vertex[0] = v1[0] + (((v2[0] - v1[0]) as f64 * dist_inverse) as f32);
248+
vertex[1] = v1[1] + (((v2[1] - v1[1]) as f64 * dist_inverse) as f32);
249+
vertex[2] = v1[2] + (((v2[2] - v1[2]) as f64 * dist_inverse) as f32);
250+
vertex[3] = v1[3] + (((v2[3] - v1[3]) as f64 * dist_inverse) as f32);
250251
vertex
251252
}
252253

253254
fn clip_polygon(unclipped: &[Vectori32<4>; 4], clipped: &mut [Vectorf32<4>; 10], size: &mut usize) -> bool {
254255
let mut clip = false;
255256

256257
let mut vertices = [Vectorf32::<4>::default(); 10];
257-
for i in 0..4 {
258-
for j in 0..4 {
259-
const NORMALIZE: f32 = 1f32 / 4096f32;
260-
vertices[i][j] = unclipped[i][j] as f32 * NORMALIZE;
261-
}
258+
unsafe {
259+
let vertices0 = vld1q_s32(unclipped[0].as_ref().as_ptr());
260+
let vertices1 = vld1q_s32(unclipped[1].as_ref().as_ptr());
261+
let vertices2 = vld1q_s32(unclipped[2].as_ref().as_ptr());
262+
let vertices3 = vld1q_s32(unclipped[3].as_ref().as_ptr());
263+
264+
let vertices0 = vcvtq_f32_s32(vertices0);
265+
let vertices1 = vcvtq_f32_s32(vertices1);
266+
let vertices2 = vcvtq_f32_s32(vertices2);
267+
let vertices3 = vcvtq_f32_s32(vertices3);
268+
269+
let vertices0 = vmulq_n_f32(vertices0, 1f32 / 4096f32);
270+
let vertices1 = vmulq_n_f32(vertices1, 1f32 / 4096f32);
271+
let vertices2 = vmulq_n_f32(vertices2, 1f32 / 4096f32);
272+
let vertices3 = vmulq_n_f32(vertices3, 1f32 / 4096f32);
273+
274+
vst1q_f32(vertices[0].as_mut().as_mut_ptr(), vertices0);
275+
vst1q_f32(vertices[1].as_mut().as_mut_ptr(), vertices1);
276+
vst1q_f32(vertices[2].as_mut().as_mut_ptr(), vertices2);
277+
vst1q_f32(vertices[3].as_mut().as_mut_ptr(), vertices3);
262278
}
263279

264280
for i in 0..6 {
@@ -269,15 +285,11 @@ fn clip_polygon(unclipped: &[Vectori32<4>; 4], clipped: &mut [Vectorf32<4>; 10],
269285
let current = unsafe { vertices.get_unchecked(j) };
270286
let previous = unsafe { vertices.get_unchecked(if unlikely(j == 0) { old_size - 1 } else { j - 1 }) };
271287

272-
let (current_val, previous_val) = match i {
273-
0 => (current[0], previous[0]),
274-
1 => (-current[0], -previous[0]),
275-
2 => (current[1], previous[1]),
276-
3 => (-current[1], -previous[1]),
277-
4 => (current[2], previous[2]),
278-
5 => (-current[2], -previous[2]),
279-
_ => unsafe { unreachable_unchecked() },
280-
};
288+
let (mut current_val, mut previous_val) = (current[i >> 1], previous[i >> 1]);
289+
if i & 1 == 1 {
290+
current_val = -current_val;
291+
previous_val = -previous_val;
292+
}
281293

282294
if current_val >= -current[3] {
283295
if previous_val < -previous[3] {
@@ -1218,12 +1230,14 @@ impl Gpu3DRegisters {
12181230

12191231
let mut unclipped = [Vectori32::<4>::default(); 4];
12201232
for i in 0..size {
1221-
unclipped[i] = self.vertices.ins[self.saved_polygon.vertices_index + i].coords;
1233+
unsafe { *unclipped.get_unchecked_mut(i) = self.vertices.ins.get_unchecked(self.saved_polygon.vertices_index + i).coords };
12221234
}
12231235

12241236
if self.polygon_type == PolygonType::QuadliteralStrips {
1225-
unclipped.swap(2, 3);
1226-
self.vertices.ins.swap(self.saved_polygon.vertices_index + 2, self.saved_polygon.vertices_index + 3);
1237+
unsafe {
1238+
unclipped.swap_unchecked(2, 3);
1239+
self.vertices.ins.swap_unchecked(self.saved_polygon.vertices_index + 2, self.saved_polygon.vertices_index + 3);
1240+
}
12271241
}
12281242

12291243
let x1 = (unclipped[1][0] - unclipped[0][0]) as i64;
@@ -1265,23 +1279,31 @@ impl Gpu3DRegisters {
12651279
self.vertices.count_in -= size;
12661280
}
12671281
PolygonType::TriangleStrips => {
1282+
let Vertices { ins, count_in, .. } = &mut self.vertices;
12681283
if self.vertex_count == 3 {
1269-
self.vertices.ins[self.vertices.count_in - 3] = self.vertices.ins[self.vertices.count_in - 2];
1270-
self.vertices.ins[self.vertices.count_in - 2] = self.vertices.ins[self.vertices.count_in - 1];
1271-
self.vertices.count_in -= 1;
1284+
unsafe {
1285+
*ins.get_unchecked_mut(*count_in - 3) = *ins.get_unchecked(*count_in - 2);
1286+
*ins.get_unchecked_mut(*count_in - 2) = *ins.get_unchecked(*count_in - 1);
1287+
}
1288+
*count_in -= 1;
12721289
self.vertex_count -= 1;
1273-
} else if self.vertices.count_in < 6144 {
1274-
self.vertices.ins[self.vertices.count_in] = self.vertices.ins[self.vertices.count_in - 1];
1275-
self.vertices.ins[self.vertices.count_in - 1] = self.vertices.ins[self.vertices.count_in - 2];
1276-
self.vertices.count_in += 1;
1290+
} else if *count_in < 6144 {
1291+
unsafe {
1292+
*ins.get_unchecked_mut(*count_in) = *ins.get_unchecked(*count_in - 1);
1293+
*ins.get_unchecked_mut(*count_in - 1) = *ins.get_unchecked(*count_in - 2);
1294+
}
1295+
*count_in += 1;
12771296
self.vertex_count = 2;
12781297
}
12791298
}
12801299
PolygonType::QuadliteralStrips => {
12811300
if self.vertex_count == 4 {
1282-
self.vertices.ins[self.vertices.count_in - 4] = self.vertices.ins[self.vertices.count_in - 2];
1283-
self.vertices.ins[self.vertices.count_in - 3] = self.vertices.ins[self.vertices.count_in - 1];
1284-
self.vertices.count_in -= 2;
1301+
let Vertices { ins, count_in, .. } = &mut self.vertices;
1302+
unsafe {
1303+
*ins.get_unchecked_mut(*count_in - 4) = *ins.get_unchecked(*count_in - 2);
1304+
*ins.get_unchecked_mut(*count_in - 3) = *ins.get_unchecked(*count_in - 1);
1305+
}
1306+
*count_in -= 2;
12851307
self.vertex_count -= 2;
12861308
} else {
12871309
self.vertex_count = 2;

src/core/hle/sound_nitro.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::core::cycle_manager::EventType;
1+
use crate::core::cycle_manager::{CycleManager, EventType};
22
use crate::core::emu::{get_arm7_hle_mut, get_cm_mut, get_spu, get_spu_mut, Emu};
33
use crate::core::hle::arm7_hle::Arm7Hle;
44
use crate::core::hle::bios::{PITCH_TABLE, VOLUME_TABLE};
@@ -223,7 +223,7 @@ impl SoundNitro {
223223
get_cm_mut!(emu).schedule(174592, EventType::SoundCmdHle);
224224
}
225225

226-
fn on_alarm(&mut self, alarm_id: usize, emu: &mut Emu) {
226+
fn on_alarm(&mut self, alarm_id: usize, cm: &mut CycleManager, emu: &mut Emu) {
227227
let alarm = &mut self.alarms[alarm_id];
228228
if !alarm.active {
229229
return;
@@ -233,7 +233,7 @@ impl SoundNitro {
233233

234234
let delay = alarm.repeat;
235235
if delay != 0 {
236-
get_cm_mut!(emu).schedule(delay * 64, EventType::SoundAlarmHle(alarm_id as u8));
236+
cm.schedule(delay * 64, EventType::SoundAlarmHle(alarm_id as u8));
237237
} else {
238238
alarm.active = false;
239239
}
@@ -2164,9 +2164,9 @@ impl SoundNitro {
21642164
(self.counter >> 16) as u16
21652165
}
21662166

2167-
fn process(&mut self, param: u32, emu: &mut Emu) {
2167+
fn process(&mut self, cm: &mut CycleManager, param: u32, emu: &mut Emu) {
21682168
if param != 0 {
2169-
get_cm_mut!(emu).schedule(174592, EventType::SoundCmdHle);
2169+
cm.schedule(174592, EventType::SoundCmdHle);
21702170
}
21712171

21722172
self.update_hardware_channels(emu);
@@ -2179,17 +2179,19 @@ impl SoundNitro {
21792179

21802180
pub(super) fn ipc_recv(&mut self, data: u32, emu: &mut Emu) {
21812181
if data == 0 {
2182-
self.process(0, emu);
2182+
self.process(get_cm_mut!(emu), 0, emu);
21832183
} else if data >= 0x02000000 {
21842184
self.cmd_queue.push_back(data);
21852185
}
21862186
}
21872187

2188-
pub fn on_cmd_event(emu: &mut Emu) {
2189-
get_arm7_hle_mut!(emu).sound.nitro.process(1, emu);
2188+
#[inline(never)]
2189+
pub fn on_cmd_event(cm: &mut CycleManager, emu: &mut Emu) {
2190+
get_arm7_hle_mut!(emu).sound.nitro.process(cm, 1, emu);
21902191
}
21912192

2192-
pub fn on_alarm_event(id: u8, emu: &mut Emu) {
2193-
get_arm7_hle_mut!(emu).sound.nitro.on_alarm(id as usize, emu);
2193+
#[inline(never)]
2194+
pub fn on_alarm_event(id: u8, cm: &mut CycleManager, emu: &mut Emu) {
2195+
get_arm7_hle_mut!(emu).sound.nitro.on_alarm(id as usize, cm, emu);
21942196
}
21952197
}

src/core/memory/dma.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ impl Dma {
302302
}
303303
}
304304

305+
#[inline(never)]
305306
pub fn on_event<const CPU: CpuType>(channel_num: u8, emu: &mut Emu) {
306307
let channel_num = channel_num as usize;
307308
unsafe { assert_unchecked(channel_num < CHANNEL_COUNT) };

0 commit comments

Comments
 (0)