Skip to content

Commit fde8405

Browse files
committed
Implement fastmem for multiple reads
1 parent cf08d39 commit fde8405

File tree

11 files changed

+227
-43
lines changed

11 files changed

+227
-43
lines changed

src/jit/assembler/arm/transfer_assembler.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ pub struct LdmStm {
308308
impl LdmStm {
309309
#[inline]
310310
pub fn generic(op0: Reg, regs: RegReserve, read: bool, write_back: bool, add_to_base: bool, pre: bool, cond: Cond) -> u32 {
311+
debug_assert!(!write_back || !regs.is_reserved(op0));
311312
u32::from(LdmStm::new(
312313
regs.0 as u16,
313314
u4::new(op0 as u8),
@@ -323,6 +324,7 @@ impl LdmStm {
323324

324325
#[inline]
325326
pub fn push_post(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
327+
debug_assert!(!regs.is_reserved(sp));
326328
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), false, true, false, false, false, u3::new(0b100), u4::new(cond as u8)))
327329
}
328330

@@ -333,11 +335,13 @@ impl LdmStm {
333335

334336
#[inline]
335337
pub fn push_pre(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
338+
debug_assert!(!regs.is_reserved(sp));
336339
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), false, true, false, false, true, u3::new(0b100), u4::new(cond as u8)))
337340
}
338341

339342
#[inline]
340343
pub fn pop_post(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
344+
debug_assert!(!regs.is_reserved(sp));
341345
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), true, true, false, true, false, u3::new(0b100), u4::new(cond as u8)))
342346
}
343347

@@ -348,6 +352,7 @@ impl LdmStm {
348352

349353
#[inline]
350354
pub fn pop_pre(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
355+
debug_assert!(!regs.is_reserved(sp));
351356
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), true, true, false, true, true, u3::new(0b100), u4::new(cond as u8)))
352357
}
353358
}

src/jit/assembler/block_asm.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,28 @@ impl<'a> BlockAsm<'a> {
345345
})
346346
}
347347

348+
pub fn guest_transfer_read_multiple(
349+
&mut self,
350+
addr_reg: impl Into<BlockReg>,
351+
addr_out_reg: impl Into<BlockReg>,
352+
gp_regs: RegReserve,
353+
fixed_regs: RegReserve,
354+
write_back: bool,
355+
pre: bool,
356+
add_to_base: bool,
357+
) {
358+
self.insert_inst(BlockInstKind::GuestTransferMultiple {
359+
op: BlockTransferOp::Read,
360+
addr_reg: addr_reg.into(),
361+
addr_out_reg: addr_out_reg.into(),
362+
gp_regs,
363+
fixed_regs,
364+
write_back,
365+
pre,
366+
add_to_base,
367+
})
368+
}
369+
348370
pub fn mrs_cpsr(&mut self, operand: impl Into<BlockReg>) {
349371
self.insert_inst(BlockInstKind::SystemReg {
350372
op: BlockSystemRegOp::Mrs,

src/jit/assembler/block_inst.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ impl BlockInst {
119119
*self.io_cache.borrow_mut() = None;
120120
self.kind.replace_output_regs(old, new);
121121
}
122+
123+
pub fn needs_separated_io_regs(&self) -> bool {
124+
matches!(self.kind, BlockInstKind::TransferMultiple { .. } | BlockInstKind::GuestTransferMultiple { .. })
125+
}
122126
}
123127

124128
impl From<BlockInstKind> for BlockInst {
@@ -168,6 +172,16 @@ pub enum BlockInstKind {
168172
pre: bool,
169173
add_to_base: bool,
170174
},
175+
GuestTransferMultiple {
176+
op: BlockTransferOp,
177+
addr_reg: BlockReg,
178+
addr_out_reg: BlockReg,
179+
gp_regs: RegReserve,
180+
fixed_regs: RegReserve,
181+
write_back: bool,
182+
pre: bool,
183+
add_to_base: bool,
184+
},
171185
SystemReg {
172186
op: BlockSystemRegOp,
173187
operand: BlockOperand,
@@ -302,6 +316,26 @@ impl BlockInstKind {
302316
),
303317
BlockTransferOp::Write => (BlockRegSet::new_fixed(*regs) + *operand, if *write_back { block_reg_set!(Some(*operand)) } else { block_reg_set!() }),
304318
},
319+
BlockInstKind::GuestTransferMultiple {
320+
op,
321+
addr_reg,
322+
addr_out_reg,
323+
gp_regs,
324+
fixed_regs,
325+
write_back,
326+
..
327+
} => match op {
328+
BlockTransferOp::Read => {
329+
let mut outputs = BlockRegSet::new_fixed(*fixed_regs);
330+
outputs.add_guests(*gp_regs);
331+
(block_reg_set!(Some(*addr_reg)), if *write_back { outputs + *addr_out_reg } else { outputs })
332+
}
333+
BlockTransferOp::Write => {
334+
let mut inputs = BlockRegSet::new_fixed(*fixed_regs);
335+
inputs.add_guests(*gp_regs);
336+
(inputs + *addr_reg, if *write_back { block_reg_set!(Some(*addr_out_reg)) } else { block_reg_set!() })
337+
}
338+
},
305339
BlockInstKind::SystemReg { op, operand } => match op {
306340
BlockSystemRegOp::Mrs => (block_reg_set!(), block_reg_set!(Some(operand.as_reg()))),
307341
BlockSystemRegOp::Msr => (block_reg_set!(operand.try_as_reg()), block_reg_set!()),
@@ -443,6 +477,7 @@ impl BlockInstKind {
443477
operands[2].replace_regs(old, new);
444478
}
445479
BlockInstKind::TransferMultiple { operand, .. } => Self::replace_reg(operand, old, new),
480+
BlockInstKind::GuestTransferMultiple { addr_reg, .. } => Self::replace_reg(addr_reg, old, new),
446481
BlockInstKind::SystemReg { op, operand } => {
447482
if *op == BlockSystemRegOp::Msr {
448483
Self::replace_operand(operand, old, new);
@@ -501,6 +536,11 @@ impl BlockInstKind {
501536
Self::replace_reg(operand, old, new);
502537
}
503538
}
539+
BlockInstKind::GuestTransferMultiple { addr_out_reg, write_back, .. } => {
540+
if *write_back {
541+
Self::replace_reg(addr_out_reg, old, new);
542+
}
543+
}
504544
BlockInstKind::SystemReg { op, operand } => {
505545
if *op == BlockSystemRegOp::Mrs {
506546
Self::replace_operand(operand, old, new);
@@ -676,6 +716,29 @@ impl BlockInstKind {
676716
pre,
677717
add_to_base,
678718
} => opcodes.push(LdmStm::generic(operand.as_fixed(), *regs, *op == BlockTransferOp::Read, *write_back, *add_to_base, *pre, Cond::AL)),
719+
BlockInstKind::GuestTransferMultiple {
720+
op,
721+
addr_reg,
722+
addr_out_reg,
723+
gp_regs,
724+
fixed_regs,
725+
write_back,
726+
pre,
727+
add_to_base,
728+
} => {
729+
if *write_back && *addr_reg != *addr_out_reg {
730+
opcodes.push(AluShiftImm::mov_al(addr_out_reg.as_fixed(), addr_reg.as_fixed()))
731+
}
732+
opcodes.push(LdmStm::generic(
733+
if *write_back { addr_out_reg.as_fixed() } else { addr_reg.as_fixed() },
734+
*gp_regs + *fixed_regs,
735+
*op == BlockTransferOp::Read,
736+
*write_back,
737+
*add_to_base,
738+
*pre,
739+
Cond::AL,
740+
))
741+
}
679742
BlockInstKind::SystemReg { op, operand } => match op {
680743
BlockSystemRegOp::Mrs => opcodes.push(Mrs::cpsr(operand.as_reg().as_fixed(), Cond::AL)),
681744
BlockSystemRegOp::Msr => opcodes.push(Msr::cpsr_flags(operand.as_reg().as_fixed(), Cond::AL)),
@@ -866,6 +929,22 @@ impl Debug for BlockInstKind {
866929
let add_to_base = if *add_to_base { "+" } else { "-" };
867930
write!(f, "{op:?}M {operand:?} {regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base")
868931
}
932+
BlockInstKind::GuestTransferMultiple {
933+
op,
934+
addr_reg,
935+
addr_out_reg,
936+
gp_regs,
937+
fixed_regs,
938+
write_back,
939+
pre,
940+
add_to_base,
941+
} => {
942+
let add_to_base = if *add_to_base { "+" } else { "-" };
943+
write!(
944+
f,
945+
"{op:?}M {addr_reg:?} -> {addr_out_reg:?} gp regs: {gp_regs:?}, fixed regs: {fixed_regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base"
946+
)
947+
}
869948
BlockInstKind::SystemReg { op, operand } => write!(f, "{op:?} {operand:?}"),
870949
BlockInstKind::Bfc { operand, lsb, width } => write!(f, "Bfc {operand:?}, {lsb}, {width}"),
871950
BlockInstKind::Bfi { operands, lsb, width } => write!(f, "Bfi {:?}, {:?}, {lsb}, {width}", operands[0], operands[1]),

src/jit/assembler/block_reg_allocator.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ impl BlockRegAllocator {
100100

101101
fn allocate_common(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Option<Reg> {
102102
for reg in ALLOCATION_REGS {
103-
if self.stored_mapping_reverse[reg as usize].is_none() {
103+
if self.stored_mapping_reverse[reg as usize].is_none() && !used_regs[0].contains(BlockReg::Fixed(reg)) && !live_ranges[1].contains(BlockReg::Fixed(reg)) {
104104
self.set_stored_mapping(any_reg, reg);
105105
return Some(reg);
106106
}
@@ -257,7 +257,7 @@ impl BlockRegAllocator {
257257
for guest_reg in guest_regs {
258258
if self.stored_mapping[guest_reg as usize] != guest_reg
259259
// Check if reg is used as a fixed input for something else
260-
&& (!SCRATCH_REGS.is_reserved(guest_reg) || !live_ranges[1].contains(BlockReg::Fixed(guest_reg)))
260+
&& !live_ranges[1].contains(BlockReg::Fixed(guest_reg))
261261
{
262262
relocatable_regs += guest_reg;
263263
}
@@ -266,10 +266,15 @@ impl BlockRegAllocator {
266266
let mut spilled_regs = Vec::new();
267267
for guest_reg in relocatable_regs {
268268
if let Some(currently_used_by) = self.stored_mapping_reverse[guest_reg as usize] {
269-
self.spilled += BlockReg::Any(currently_used_by);
270-
self.gen_pre_handle_spilled_inst(currently_used_by, guest_reg, BlockTransferOp::Write);
269+
if DEBUG && unsafe { BLOCK_LOG } {
270+
println!("relocate guest spill {currently_used_by} for {guest_reg:?}");
271+
}
272+
if inputs.contains(BlockReg::Any(currently_used_by)) || live_ranges[1].contains(BlockReg::Any(currently_used_by)) {
273+
self.spilled += BlockReg::Any(currently_used_by);
274+
self.gen_pre_handle_spilled_inst(currently_used_by, guest_reg, BlockTransferOp::Write);
275+
spilled_regs.push((BlockReg::Any(currently_used_by), guest_reg, self.pre_allocate_insts.len() - 1));
276+
}
271277
self.remove_stored_mapping(currently_used_by);
272-
spilled_regs.push((BlockReg::Any(currently_used_by), guest_reg, self.pre_allocate_insts.len() - 1));
273278
}
274279
}
275280

src/jit/emitter/emit_transfer.rs

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::core::emu::{get_mmu, get_regs};
22
use crate::core::CpuType;
33
use crate::jit::assembler::block_asm::BlockAsm;
4-
use crate::jit::assembler::BlockOperand;
4+
use crate::jit::assembler::{BlockOperand, BlockReg};
55
use crate::jit::inst_info::Operand;
66
use crate::jit::inst_mem_handler::{inst_mem_handler, inst_mem_handler_multiple, inst_mem_handler_swp};
77
use crate::jit::jit_asm::JitAsm;
@@ -188,10 +188,14 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
188188
block_asm.mov(op0, (fast_read_value_reg, ShiftType::Ror, fast_read_addr_masked_reg));
189189
}
190190

191-
block_asm.nop();
192191
block_asm.branch_fallthrough(continue_label, Cond::AL);
193-
194192
block_asm.label(slow_read_label);
193+
194+
block_asm.restore_reg(op0);
195+
if amount == MemoryAmount::Double {
196+
block_asm.restore_reg(Reg::from(op0 as u8 + 1));
197+
}
198+
195199
block_asm.save_context();
196200

197201
let op0_addr = get_regs!(self.emu, CPU).get_reg(op0) as *const _ as u32;
@@ -246,13 +250,76 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
246250

247251
let mut pre = inst_info.op.mem_transfer_pre();
248252
let decrement = inst_info.op.mem_transfer_decrement();
249-
if decrement {
250-
pre = !pre;
251-
}
252253
let write_back = inst_info.op.mem_transfer_write_back();
253254

254255
let op0 = *inst_info.operands()[0].as_reg_no_shift().unwrap();
255256

257+
let is_valid = !rlist.is_empty() && (!write_back || !rlist.is_reserved(op0));
258+
259+
let slow_read_label = block_asm.new_label();
260+
let continue_label = block_asm.new_label();
261+
262+
if is_valid && !inst_info.op.mem_is_write() && !inst_info.op.mem_transfer_user() && rlist.len() < RegReserve::gp().len() - 2 {
263+
let mut gp_regs = rlist.get_gp_regs();
264+
let mut free_gp_regs = if gp_regs.is_empty() {
265+
RegReserve::gp()
266+
} else {
267+
let highest_gp_reg = gp_regs.get_highest_reg();
268+
RegReserve::from(!((1 << (highest_gp_reg as u8 + 1)) - 1)).get_gp_regs()
269+
};
270+
let mut non_gp_regs = rlist - gp_regs;
271+
272+
while free_gp_regs.len() < non_gp_regs.len() {
273+
let highest_gp_reg = gp_regs.get_highest_reg();
274+
gp_regs -= highest_gp_reg;
275+
non_gp_regs += highest_gp_reg;
276+
free_gp_regs = if gp_regs.is_empty() {
277+
RegReserve::gp()
278+
} else {
279+
RegReserve::from(!((1 << (gp_regs.get_highest_reg() as u8 + 1)) - 1)).get_gp_regs()
280+
};
281+
}
282+
283+
let mut non_gp_regs_mappings = Vec::with_capacity(non_gp_regs.len());
284+
let mut fixed_regs = RegReserve::new();
285+
while !free_gp_regs.is_empty() && !non_gp_regs.is_empty() {
286+
let fixed_reg = free_gp_regs.pop().unwrap();
287+
fixed_regs += fixed_reg;
288+
non_gp_regs_mappings.push((non_gp_regs.pop().unwrap(), fixed_reg));
289+
}
290+
291+
if non_gp_regs.is_empty() {
292+
block_asm.branch(slow_read_label, Cond::NV);
293+
294+
let base_reg = block_asm.new_reg();
295+
let base_reg_out = block_asm.new_reg();
296+
let mmu = get_mmu!(self.emu, CPU);
297+
let base_ptr = mmu.get_base_tcm_ptr();
298+
block_asm.bic(base_reg, op0, 0xF0000000);
299+
block_asm.add(base_reg, base_reg, base_ptr as u32);
300+
block_asm.guest_transfer_read_multiple(base_reg, base_reg_out, gp_regs, fixed_regs, write_back, pre, !decrement);
301+
302+
for (guest_reg, fixed_reg) in non_gp_regs_mappings {
303+
block_asm.mov(guest_reg, BlockReg::Fixed(fixed_reg));
304+
}
305+
306+
if write_back {
307+
block_asm.sub(base_reg_out, base_reg_out, base_ptr as u32);
308+
block_asm.mov(op0, (op0.into(), ShiftType::Lsr, BlockOperand::from(0xF0000000u32.trailing_zeros())));
309+
block_asm.bfi(base_reg_out, op0, 0xF0000000u32.trailing_zeros() as u8, 0xF0000000u32.leading_ones() as u8);
310+
block_asm.mov(op0, base_reg_out);
311+
}
312+
313+
block_asm.branch_fallthrough(continue_label, Cond::AL);
314+
315+
block_asm.free_reg(base_reg_out);
316+
block_asm.free_reg(base_reg);
317+
}
318+
}
319+
320+
if decrement {
321+
pre = !pre;
322+
}
256323
let func_addr: *const () = match (inst_info.op.mem_is_write(), inst_info.op.mem_transfer_user(), pre, write_back, decrement) {
257324
(false, false, false, false, false) => inst_mem_handler_multiple::<CPU, THUMB, false, false, false, false, false> as _,
258325
(true, false, false, false, false) => inst_mem_handler_multiple::<CPU, THUMB, true, false, false, false, false> as _,
@@ -288,6 +355,7 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
288355
(true, true, true, true, true) => inst_mem_handler_multiple::<CPU, THUMB, true, true, true, true, true> as _,
289356
};
290357

358+
block_asm.label(slow_read_label);
291359
block_asm.save_context();
292360
block_asm.call3(
293361
func_addr,
@@ -306,8 +374,9 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
306374
for reg in restore_regs {
307375
block_asm.restore_reg(reg);
308376
}
309-
310377
block_asm.restore_reg(Reg::CPSR);
378+
379+
block_asm.label(continue_label);
311380
}
312381

313382
pub fn emit_swp(&mut self, block_asm: &mut BlockAsm) {

src/jit/inst_mem_handler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ mod handler {
7474
let rlist = RegReserve::from(rlist as u32);
7575
let op0 = Reg::from(op0);
7676

77-
if unlikely(rlist.len() == 0) {
77+
if unlikely(rlist.is_empty()) {
7878
if WRITE {
7979
*get_regs_mut!(emu, CPU).get_reg_mut(op0) -= 0x40;
8080
} else {

0 commit comments

Comments
 (0)