Skip to content

Commit b80f0dc

Browse files
committed
Implement return stack
1 parent b1337f6 commit b80f0dc

File tree

12 files changed

+292
-102
lines changed

12 files changed

+292
-102
lines changed

src/core/graphics/gpu_3d/registers_3d.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,6 @@ impl Gpu3DRegisters {
500500
let mut param_count = FIFO_PARAM_COUNTS[entry.cmd as usize];
501501
if param_count > 1 {
502502
if param_count as usize > self.cmd_fifo.len() {
503-
refresh_state(self);
504503
break;
505504
}
506505

@@ -567,10 +566,10 @@ impl Gpu3DRegisters {
567566
if self.cmd_pipe_size as usize > self.cmd_fifo.len() {
568567
self.cmd_pipe_size = self.cmd_fifo.len() as u8;
569568
}
570-
571-
refresh_state(self);
572569
}
573570

571+
refresh_state(self);
572+
574573
if !self.is_cmd_fifo_full() {
575574
get_cpu_regs_mut!(emu, ARM9).unhalt(1);
576575
}

src/jit/assembler/arm/branch_assembler.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ impl B {
2121
u4::new(cond as u8),
2222
))
2323
}
24+
25+
pub fn bl(imm: i32, cond: Cond) -> u32 {
26+
u32::from(B::new(
27+
// Extract first 24 bits, also keep msb
28+
u24::new((((imm << 8) >> 8) & 0xFFFFFF) as u32),
29+
u1::new(1),
30+
u3::new(0b101),
31+
u4::new(cond as u8),
32+
))
33+
}
2434
}
2535

2636
#[bitsize(32)]

src/jit/assembler/block_asm.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,18 +249,23 @@ impl<'a> BlockAsm<'a> {
249249
pub fn load_u8(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
250250
self.transfer_read(op0, op1, op2, false, MemoryAmount::Byte)
251251
}
252+
252253
pub fn store_u8(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
253254
self.transfer_write(op0, op1, op2, false, MemoryAmount::Byte)
254255
}
256+
255257
pub fn load_u16(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
256258
self.transfer_read(op0, op1, op2, false, MemoryAmount::Half)
257259
}
260+
258261
pub fn store_u16(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
259262
self.transfer_write(op0, op1, op2, false, MemoryAmount::Half)
260263
}
264+
261265
pub fn load_u32(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
262266
self.transfer_read(op0, op1, op2, false, MemoryAmount::Word)
263267
}
268+
264269
pub fn store_u32(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
265270
self.transfer_write(op0, op1, op2, false, MemoryAmount::Word)
266271
}
@@ -401,6 +406,11 @@ impl<'a> BlockAsm<'a> {
401406
self.buf.insts.push(BlockInst::Epilogue);
402407
}
403408

409+
pub fn epilogue_previous_block(&mut self) {
410+
self.add(BlockReg::Fixed(Reg::SP), BlockReg::Fixed(Reg::SP), ANY_REG_LIMIT as u32 * 4);
411+
self.buf.insts.push(BlockInst::Epilogue);
412+
}
413+
404414
pub fn call(&mut self, func: impl Into<BlockOperand>) {
405415
self.call_internal(func, None::<BlockOperand>, None::<BlockOperand>, None::<BlockOperand>, None::<BlockOperand>, true)
406416
}

src/jit/assembler/block_inst.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ impl BlockInst {
164164
(block_reg_set!(Some(*thread_regs_addr_reg)), outputs)
165165
}
166166

167-
BlockInst::Call { func_reg, args, .. } => {
167+
BlockInst::Call { func_reg, args, has_return } => {
168168
let mut inputs = BlockRegSet::new();
169169
inputs += *func_reg;
170170
for arg in args {
@@ -180,7 +180,8 @@ impl BlockInst {
180180
Some(BlockReg::Fixed(Reg::R2)),
181181
Some(BlockReg::Fixed(Reg::R3)),
182182
Some(BlockReg::Fixed(Reg::R12)),
183-
Some(BlockReg::Fixed(Reg::CPSR))
183+
Some(BlockReg::Fixed(Reg::CPSR)),
184+
if *has_return { Some(BlockReg::Fixed(Reg::LR)) } else { None }
184185
),
185186
)
186187
}

src/jit/disassembler/thumb/branch_instructions_thumb.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod branch_thumb_ops {
1313
#[inline]
1414
pub fn blx_reg_t(opcode: u16, op: Op) -> InstInfoThumb {
1515
let op0 = Reg::from(((opcode >> 3) & 0xF) as u8);
16-
InstInfoThumb::new(opcode, op, Operands::new_1(Operand::reg(op0)), reg_reserve!(op0), reg_reserve!(Reg::CPSR), 1)
16+
InstInfoThumb::new(opcode, op, Operands::new_1(Operand::reg(op0)), reg_reserve!(op0), reg_reserve!(Reg::LR, Reg::CPSR), 1)
1717
}
1818

1919
#[inline]

src/jit/emitter/emit.rs

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::core::CpuType;
22
use crate::core::CpuType::ARM7;
33
use crate::jit::assembler::block_asm::BlockAsm;
4-
use crate::jit::assembler::BlockReg;
4+
use crate::jit::assembler::{BlockLabel, BlockReg};
55
use crate::jit::inst_threag_regs_handler::{register_restore_spsr, restore_thumb_after_restore_spsr, set_pc_arm_mode};
66
use crate::jit::jit_asm::{JitAsm, JitRuntimeData};
77
use crate::jit::op::Op;
@@ -60,6 +60,16 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
6060
block_asm.call(restore_thumb_after_restore_spsr::<CPU> as *const ());
6161
}
6262

63+
if (op.is_mov() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::LR) && !self.jit_buf.current_inst().out_regs.is_reserved(Reg::CPSR))
64+
|| (op.is_multiple_mem_transfer() && *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() == Reg::SP)
65+
|| (op.is_single_mem_transfer() && self.jit_buf.current_inst().src_regs.is_reserved(Reg::SP))
66+
{
67+
let guest_pc_reg = block_asm.new_reg();
68+
block_asm.load_u32(guest_pc_reg, block_asm.thread_regs_addr_reg, Reg::PC as u32 * 4);
69+
self.emit_branch_return_stack_common(block_asm, guest_pc_reg);
70+
block_asm.free_reg(guest_pc_reg);
71+
}
72+
6373
self.emit_branch_out_metadata(block_asm);
6474
block_asm.epilogue();
6575
}
@@ -73,7 +83,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
7383

7484
let accumulated_cycles_reg = block_asm.new_reg();
7585
block_asm.load_u16(accumulated_cycles_reg, runtime_data_addr_reg, JitRuntimeData::get_accumulated_cycles_offset() as u32);
76-
86+
7787
// +2 for branching
7888
block_asm.add(
7989
result_accumulated_cycles_reg,
@@ -127,10 +137,11 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
127137
self._emit_branch_out_metadata(block_asm, true, true)
128138
}
129139

130-
pub fn emit_flush_cycles<ContinueFn: Fn(&mut Self, &mut BlockAsm, BlockReg), BreakoutFn: Fn(&mut Self, &mut BlockAsm)>(
140+
pub fn emit_flush_cycles<ContinueFn: Fn(&mut Self, &mut BlockAsm, BlockReg, BlockLabel), BreakoutFn: Fn(&mut Self, &mut BlockAsm)>(
131141
&mut self,
132142
block_asm: &mut BlockAsm,
133-
target_pre_cycle_count_sum: u16,
143+
target_pre_cycle_count_sum: Option<u16>,
144+
add_continue_label: bool,
134145
continue_fn: ContinueFn,
135146
breakout_fn: BreakoutFn,
136147
) {
@@ -140,7 +151,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
140151
let result_accumulated_cycles_reg = block_asm.new_reg();
141152
self.emit_count_cycles(block_asm, runtime_data_addr_reg, result_accumulated_cycles_reg);
142153

143-
const MAX_LOOP_CYCLE_COUNT: u32 = 255;
154+
const MAX_LOOP_CYCLE_COUNT: u32 = 127;
144155
block_asm.cmp(
145156
result_accumulated_cycles_reg,
146157
match CPU {
@@ -149,18 +160,28 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
149160
},
150161
);
151162

163+
let continue_label = if add_continue_label { Some(block_asm.new_label()) } else { None };
152164
let breakout_label = block_asm.new_label();
153165
block_asm.branch(breakout_label, Cond::HS);
154166

155-
let target_pre_cycle_count_sum_reg = block_asm.new_reg();
156-
block_asm.mov(target_pre_cycle_count_sum_reg, target_pre_cycle_count_sum as u32);
157-
block_asm.store_u16(target_pre_cycle_count_sum_reg, runtime_data_addr_reg, JitRuntimeData::get_pre_cycle_count_sum_offset() as u32);
158-
continue_fn(self, block_asm, runtime_data_addr_reg);
167+
if let Some(target_pre_cycle_count_sum) = target_pre_cycle_count_sum {
168+
let target_pre_cycle_count_sum_reg = block_asm.new_reg();
169+
block_asm.mov(target_pre_cycle_count_sum_reg, target_pre_cycle_count_sum as u32);
170+
block_asm.store_u16(target_pre_cycle_count_sum_reg, runtime_data_addr_reg, JitRuntimeData::get_pre_cycle_count_sum_offset() as u32);
171+
block_asm.free_reg(target_pre_cycle_count_sum_reg);
172+
}
173+
continue_fn(self, block_asm, runtime_data_addr_reg, breakout_label);
174+
if add_continue_label {
175+
block_asm.branch(continue_label.unwrap(), Cond::AL);
176+
}
159177

160178
block_asm.label(breakout_label);
161179
breakout_fn(self, block_asm);
162180

163-
block_asm.free_reg(target_pre_cycle_count_sum_reg);
181+
if add_continue_label {
182+
block_asm.label(continue_label.unwrap());
183+
}
184+
164185
block_asm.free_reg(result_accumulated_cycles_reg);
165186
block_asm.free_reg(runtime_data_addr_reg);
166187
}

0 commit comments

Comments
 (0)