Skip to content

Commit 8a76e02

Browse files
committed
Initial blocklinking
1 parent 97e056b commit 8a76e02

File tree

11 files changed

+267
-95
lines changed

11 files changed

+267
-95
lines changed

src/jit/assembler/block_asm.rs

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ impl<'a> BlockAsm<'a> {
9696

9797
instance.buf.insts.push(BlockInst::Prologue);
9898

99-
// First argument is write_to_host_sp: bool
99+
// First argument is store_host_sp: bool
100100
instance.cmp(BlockReg::Fixed(Reg::R0), 0);
101101
instance.start_cond_block(Cond::NE);
102102
let host_sp_addr_reg = thread_regs_addr_reg;
@@ -160,14 +160,17 @@ impl<'a> BlockAsm<'a> {
160160
BlockLabel(id)
161161
}
162162

163+
alu3!(and, And, None, false);
163164
alu3!(sub, Sub, None, false);
164165
alu3!(add, Add, None, false);
165166
alu3!(bic, Bic, None, false);
166167
alu3!(orr, Orr, None, false);
167168

168169
alu2_op1!(cmp, Cmp, Host, false);
170+
alu2_op1!(tst, Tst, Host, false);
169171

170172
alu2_op0!(mov, Mov, None, false);
173+
alu2_op0!(mvn, Mvn, None, false);
171174

172175
alu3!(ands_guest_thumb_pc_aligned, And, HostGuest, true);
173176
alu3!(eors_guest_thumb_pc_aligned, Eor, HostGuest, true);
@@ -513,7 +516,7 @@ impl<'a> BlockAsm<'a> {
513516

514517
if let BlockInst::Label { label, guest_pc } = self.buf.insts[i] {
515518
if let Some((p_label, p_guest_pc)) = previous_label {
516-
let replace_guest_pc = p_guest_pc.or_else(|| guest_pc);
519+
let replace_guest_pc = p_guest_pc.or(guest_pc);
517520
previous_label = Some((p_label, replace_guest_pc));
518521
let previous_node = BlockInstList::deref(current_node).previous;
519522
let previous_i = BlockInstList::deref(previous_node).value;
@@ -549,11 +552,19 @@ impl<'a> BlockAsm<'a> {
549552
}
550553
}
551554

552-
fn resolve_df_ordering(already_processed: &mut NoHashSet<usize>, basic_blocks: &mut [BasicBlock], block_i: usize, ordering: &mut [usize], ordering_start: &mut usize, ordering_end: &mut usize) {
555+
fn resolve_df_ordering(
556+
already_processed: &mut NoHashSet<usize>,
557+
completed: &mut NoHashSet<usize>,
558+
basic_blocks: &mut [BasicBlock],
559+
block_i: usize,
560+
ordering: &mut [usize],
561+
ordering_start: &mut usize,
562+
ordering_end: &mut usize,
563+
) {
553564
already_processed.insert(block_i);
554565
let mut cycle = false;
555566
for exit_i in &basic_blocks[block_i].exit_blocks {
556-
if already_processed.contains(exit_i) {
567+
if already_processed.contains(exit_i) && !completed.contains(exit_i) {
557568
cycle = true;
558569
break;
559570
}
@@ -567,9 +578,10 @@ impl<'a> BlockAsm<'a> {
567578
}
568579
for exit_i in basic_blocks[block_i].exit_blocks.clone() {
569580
if !already_processed.contains(&exit_i) {
570-
Self::resolve_df_ordering(already_processed, basic_blocks, exit_i, ordering, ordering_start, ordering_end);
581+
Self::resolve_df_ordering(already_processed, completed, basic_blocks, exit_i, ordering, ordering_start, ordering_end);
571582
}
572583
}
584+
completed.insert(block_i);
573585
}
574586

575587
fn assemble_basic_blocks(&mut self, block_start_pc: u32, thumb: bool) -> (Vec<BasicBlock>, Vec<usize>) {
@@ -671,11 +683,9 @@ impl<'a> BlockAsm<'a> {
671683
let mut current_node = basic_block.block_entry_start;
672684
while !current_node.is_null() {
673685
match &self.buf.insts[BlockInstList::deref(current_node).value] {
674-
BlockInst::Label { guest_pc, .. } => {
675-
if let Some(pc) = guest_pc {
676-
basic_block_start_pc = *pc;
677-
break;
678-
}
686+
BlockInst::Label { guest_pc: Some(pc), .. } => {
687+
basic_block_start_pc = *pc;
688+
break;
679689
}
680690
BlockInst::GuestPc(pc) => {
681691
basic_block_start_pc = *pc;
@@ -700,10 +710,19 @@ impl<'a> BlockAsm<'a> {
700710
}
701711

702712
let mut df_already_processed = NoHashSet::default();
713+
let mut df_completed = NoHashSet::default();
703714
let mut df_ordering = vec![0; basic_blocks_len];
704715
let mut df_ordering_start = 0;
705716
let mut df_ordering_end = basic_blocks_len - 1;
706-
Self::resolve_df_ordering(&mut df_already_processed, &mut basic_blocks, 0, &mut df_ordering, &mut df_ordering_start, &mut df_ordering_end);
717+
Self::resolve_df_ordering(
718+
&mut df_already_processed,
719+
&mut df_completed,
720+
&mut basic_blocks,
721+
0,
722+
&mut df_ordering,
723+
&mut df_ordering_start,
724+
&mut df_ordering_end,
725+
);
707726

708727
(basic_blocks, df_ordering)
709728
}
@@ -724,6 +743,7 @@ impl<'a> BlockAsm<'a> {
724743
break;
725744
}
726745
}
746+
assert!(i < end_j);
727747
intervals.insert(reg, (i, end_j));
728748
}
729749
processed_regs += outputs;
@@ -753,6 +773,11 @@ impl<'a> BlockAsm<'a> {
753773

754774
let mut reg_intervals = Self::assemble_intervals(&basic_blocks, &basic_blocks_order);
755775

776+
if unsafe { BLOCK_LOG } {
777+
println!("reg intervals {reg_intervals:?} ");
778+
println!("block ordering {basic_blocks_order:?}");
779+
}
780+
756781
self.buf.reg_allocator.global_mapping.clear();
757782
let mut free_regs = block_reg_allocator::ALLOCATION_REGS;
758783
while !free_regs.is_empty() {
@@ -785,10 +810,10 @@ impl<'a> BlockAsm<'a> {
785810
let mut branch_placeholders = Vec::new();
786811
let mut opcodes_offset = Vec::with_capacity(basic_blocks.len());
787812
for (i, basic_block) in basic_blocks.iter().enumerate() {
813+
opcodes_offset.push(opcodes.len());
788814
if i != 0 && basic_block.enter_blocks.is_empty() {
789815
continue;
790816
}
791-
opcodes_offset.push(opcodes.len());
792817
opcodes.extend(basic_block.emit_opcodes(&mut self, &mut branch_placeholders, opcodes.len()));
793818
}
794819

src/jit/assembler/block_inst.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,9 @@ impl BlockInst {
600600
opcodes.push(inst_info.assemble());
601601
}
602602

603-
BlockInst::Prologue => opcodes.push(LdmStm::generic(Reg::SP, block_reg_allocator::ALLOCATION_REGS + Reg::LR, false, true, false, true, Cond::AL)),
604-
BlockInst::Epilogue => opcodes.push(LdmStm::generic(Reg::SP, block_reg_allocator::ALLOCATION_REGS + Reg::PC, true, true, true, false, Cond::AL)),
603+
// r4-r12,{lr|pc} since we need an even amount of registers for 8 byte alignment, in case the compiler decides to use neon instructions
604+
BlockInst::Prologue => opcodes.push(LdmStm::generic(Reg::SP, block_reg_allocator::ALLOCATION_REGS + Reg::R12 + Reg::LR, false, true, false, true, Cond::AL)),
605+
BlockInst::Epilogue => opcodes.push(LdmStm::generic(Reg::SP, block_reg_allocator::ALLOCATION_REGS + Reg::R12 + Reg::PC, true, true, true, false, Cond::AL)),
605606

606607
BlockInst::Label { .. } | BlockInst::GuestPc(_) => {}
607608
}

src/jit/emitter/emit.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::core::CpuType;
22
use crate::core::CpuType::ARM7;
33
use crate::jit::assembler::block_asm::BlockAsm;
4+
use crate::jit::assembler::{BlockLabel, BlockReg};
45
use crate::jit::inst_threag_regs_handler::{register_restore_spsr, restore_thumb_after_restore_spsr, set_pc_arm_mode};
56
use crate::jit::jit_asm::{JitAsm, JitRuntimeData};
67
use crate::jit::op::Op;
@@ -18,7 +19,8 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
1819
block_asm.start_cond_block(cond);
1920
match op {
2021
Op::B | Op::Bl => self.emit_branch_label(block_asm),
21-
Op::Bx | Op::BlxReg => self.emit_branch_reg(block_asm),
22+
Op::Bx => self.emit_bx(block_asm),
23+
Op::BlxReg => self.emit_blx(block_asm),
2224
Op::Blx => self.emit_blx_label(block_asm),
2325
Op::Mcr | Op::Mrc => self.emit_cp15(block_asm),
2426
Op::MsrRc | Op::MsrIc | Op::MsrRs | Op::MsrIs => self.emit_msr(block_asm),
@@ -99,7 +101,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
99101
self._emit_branch_out_metadata(block_asm, true)
100102
}
101103

102-
pub fn emit_flush_cycles<ContinueFn: Fn(&mut Self, &mut BlockAsm), BreakoutFn: Fn(&mut Self, &mut BlockAsm)>(
104+
pub fn emit_flush_cycles<ContinueFn: Fn(&mut Self, &mut BlockAsm, BlockLabel, BlockReg), BreakoutFn: Fn(&mut Self, &mut BlockAsm)>(
103105
&mut self,
104106
block_asm: &mut BlockAsm,
105107
target_pre_cycle_count_sum: u16,
@@ -155,7 +157,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
155157
false,
156158
MemoryAmount::Half,
157159
);
158-
continue_fn(self, block_asm);
160+
continue_fn(self, block_asm, breakout_label, runtime_data_addr_reg);
159161

160162
block_asm.label(breakout_label);
161163
breakout_fn(self, block_asm);

src/jit/emitter/emit_branch.rs

Lines changed: 102 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
use crate::core::emu::get_jit;
12
use crate::core::CpuType;
23
use crate::core::CpuType::ARM9;
34
use crate::jit::assembler::block_asm::BlockAsm;
5+
use crate::jit::assembler::{BlockOperand, BlockReg};
46
use crate::jit::inst_info::InstInfo;
5-
use crate::jit::jit_asm::JitAsm;
7+
use crate::jit::jit_asm::{JitAsm, JitRuntimeData, BLOCK_LINK_STACK_SIZE};
68
use crate::jit::op::Op;
79
use crate::jit::reg::{reg_reserve, Reg, RegReserve};
8-
use crate::jit::Cond;
10+
use crate::jit::{jit_memory_map, Cond, MemoryAmount, ShiftType};
911

1012
pub enum JitBranchInfo {
1113
Idle,
@@ -80,15 +82,15 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
8082
let branch_info = Self::analyze_branch_label::<THUMB>(&self.jit_buf.insts, self.jit_buf.current_index, cond, self.jit_buf.current_pc, target_pc);
8183

8284
if let JitBranchInfo::Local(target_index) = branch_info {
83-
let target_pre_cycle_count_sum = if target_index == 0 { 0 } else { self.jit_buf.insts_cycle_counts[target_index] };
85+
let target_pre_cycle_count_sum = self.jit_buf.insts_cycle_counts[target_index] - self.jit_buf.insts[target_index].cycle as u16;
8486

8587
let backed_up_cpsr_reg = block_asm.new_reg();
8688
block_asm.mrs_cpsr(backed_up_cpsr_reg);
8789

8890
self.emit_flush_cycles(
8991
block_asm,
9092
target_pre_cycle_count_sum,
91-
|_, block_asm| {
93+
|_, block_asm, _, _| {
9294
block_asm.msr_cpsr(backed_up_cpsr_reg);
9395
block_asm.guest_branch(Cond::AL, target_pc);
9496
},
@@ -113,19 +115,105 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
113115
block_asm.epilogue();
114116
}
115117

116-
pub fn emit_branch_reg(&mut self, block_asm: &mut BlockAsm) {
118+
pub fn emit_bx(&mut self, block_asm: &mut BlockAsm) {
117119
let inst_info = self.jit_buf.current_inst();
118120
let branch_to = *inst_info.operands()[0].as_reg_no_shift().unwrap();
119121

120-
if inst_info.op == Op::BlxReg {
121-
block_asm.mov(Reg::LR, self.jit_buf.current_pc + 4);
122-
}
123122
block_asm.mov(Reg::PC, branch_to);
124123
block_asm.save_context();
125124
self.emit_branch_out_metadata(block_asm);
126125
block_asm.epilogue();
127126
}
128127

128+
pub fn emit_blx(&mut self, block_asm: &mut BlockAsm) {
129+
let inst_info = self.jit_buf.current_inst();
130+
let target_pc_reg = *inst_info.operands()[0].as_reg_no_shift().unwrap();
131+
132+
block_asm.mov(Reg::LR, self.jit_buf.current_pc + 4);
133+
self.emit_branch_reg_common(block_asm, target_pc_reg.into());
134+
}
135+
136+
pub fn emit_branch_reg_common(&mut self, block_asm: &mut BlockAsm, target_pc_reg: BlockReg) {
137+
block_asm.mov(Reg::PC, target_pc_reg);
138+
block_asm.save_context();
139+
140+
self.emit_flush_cycles(
141+
block_asm,
142+
0,
143+
|asm, block_asm, breakout_label, runtime_data_addr_reg| {
144+
let block_link_ptr_reg = block_asm.new_reg();
145+
146+
block_asm.transfer_read(block_link_ptr_reg, runtime_data_addr_reg, JitRuntimeData::get_block_link_ptr_offset() as u32, false, MemoryAmount::Byte);
147+
148+
block_asm.cmp(block_link_ptr_reg, BLOCK_LINK_STACK_SIZE as u32);
149+
block_asm.branch(breakout_label, Cond::EQ);
150+
151+
let block_link_stack_ptr_reg = block_asm.new_reg();
152+
block_asm.add(block_link_stack_ptr_reg, runtime_data_addr_reg, JitRuntimeData::get_block_link_stack_offset() as u32);
153+
block_asm.add(block_link_stack_ptr_reg, block_link_stack_ptr_reg, (block_link_ptr_reg.into(), ShiftType::Lsl, BlockOperand::from(3)));
154+
block_asm.transfer_write(Reg::LR, block_link_stack_ptr_reg, 0, false, MemoryAmount::Word);
155+
156+
let return_pre_cycle_count_sum_reg = block_asm.new_reg();
157+
block_asm.mov(return_pre_cycle_count_sum_reg, asm.jit_buf.insts_cycle_counts[asm.jit_buf.current_index] as u32);
158+
block_asm.transfer_write(return_pre_cycle_count_sum_reg, block_link_stack_ptr_reg, 4, false, MemoryAmount::Half);
159+
160+
block_asm.add(block_link_ptr_reg, block_link_ptr_reg, 1);
161+
block_asm.transfer_write(block_link_ptr_reg, runtime_data_addr_reg, JitRuntimeData::get_block_link_ptr_offset() as u32, false, MemoryAmount::Byte);
162+
163+
block_asm.free_reg(return_pre_cycle_count_sum_reg);
164+
block_asm.free_reg(block_link_stack_ptr_reg);
165+
block_asm.free_reg(block_link_ptr_reg);
166+
167+
let target_addr_reg = block_asm.new_reg();
168+
let pc_mask_reg = block_asm.new_reg();
169+
170+
// Align pc to !1 or !3
171+
block_asm.mvn(pc_mask_reg, 1);
172+
block_asm.tst(target_pc_reg, 1);
173+
block_asm.start_cond_block(Cond::EQ);
174+
block_asm.mvn(pc_mask_reg, 3);
175+
block_asm.end_cond_block();
176+
177+
block_asm.and(target_addr_reg, target_pc_reg, pc_mask_reg);
178+
179+
let map_ptr = get_jit!(asm.emu).jit_memory_map.get_map_ptr::<CPU>();
180+
181+
let map_ptr_reg = block_asm.new_reg();
182+
let map_index_reg = block_asm.new_reg();
183+
let map_entry_base_ptr_reg = block_asm.new_reg();
184+
185+
block_asm.mov(map_ptr_reg, map_ptr as u32);
186+
block_asm.mov(map_index_reg, (target_addr_reg.into(), ShiftType::Lsr, BlockOperand::from(jit_memory_map::BLOCK_SHIFT as u32 + 1)));
187+
block_asm.transfer_read(
188+
map_entry_base_ptr_reg,
189+
map_ptr_reg,
190+
(map_index_reg.into(), ShiftType::Lsl, BlockOperand::from(2)),
191+
false,
192+
MemoryAmount::Word,
193+
);
194+
let block_size_mask_reg = map_index_reg;
195+
block_asm.mov(block_size_mask_reg, (jit_memory_map::BLOCK_SIZE as u32 - 1) << 2);
196+
block_asm.and(target_addr_reg, block_size_mask_reg, (target_addr_reg.into(), ShiftType::Lsl, BlockOperand::from(1)));
197+
198+
let entry_fn_reg = block_asm.new_reg();
199+
block_asm.transfer_read(entry_fn_reg, map_entry_base_ptr_reg, target_addr_reg, false, MemoryAmount::Word);
200+
201+
block_asm.call1(entry_fn_reg, 0);
202+
203+
block_asm.free_reg(entry_fn_reg);
204+
block_asm.free_reg(map_entry_base_ptr_reg);
205+
block_asm.free_reg(map_index_reg);
206+
block_asm.free_reg(map_ptr_reg);
207+
block_asm.free_reg(pc_mask_reg);
208+
block_asm.free_reg(target_addr_reg);
209+
},
210+
|asm, block_asm| {
211+
asm.emit_branch_out_metadata(block_asm);
212+
block_asm.epilogue();
213+
},
214+
);
215+
}
216+
129217
pub fn emit_blx_label(&mut self, block_asm: &mut BlockAsm) {
130218
if CPU != ARM9 {
131219
return;
@@ -134,10 +222,12 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
134222
let relative_pc = *self.jit_buf.current_inst().operands()[0].as_imm().unwrap() as i32 + 8;
135223
let target_pc = (self.jit_buf.current_pc as i32 + relative_pc) as u32;
136224

225+
let target_pc_reg = block_asm.new_reg();
226+
block_asm.mov(target_pc_reg, target_pc | 1);
227+
137228
block_asm.mov(Reg::LR, self.jit_buf.current_pc + 4);
138-
block_asm.mov(Reg::PC, target_pc | 1);
139-
block_asm.save_context();
140-
self.emit_branch_out_metadata(block_asm);
141-
block_asm.epilogue();
229+
self.emit_branch_reg_common(block_asm, target_pc_reg);
230+
231+
block_asm.free_reg(target_pc_reg);
142232
}
143233
}

src/jit/emitter/thumb/emit_branch_thumb.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
6363
}
6464
block_asm.mov(Reg::PC, target_pc_reg);
6565

66-
block_asm.save_context();
67-
self.emit_branch_out_metadata(block_asm);
68-
block_asm.epilogue();
69-
66+
self.emit_branch_reg_common(block_asm, target_pc_reg);
7067
block_asm.free_reg(target_pc_reg);
7168
}
7269

src/jit/emitter/thumb/emit_thumb.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
5353
Op::BxRegT | Op::BlxRegT => self.emit_bx_thumb(block_asm),
5454

5555
Op::SwiT => self.emit_swi::<true>(block_asm),
56-
Op::UnkThumb => todo!("{op:?}"),
56+
Op::UnkThumb => {}
5757
op if op.is_single_mem_transfer() => {
5858
if op.mem_is_write() {
5959
self.emit_str_thumb(block_asm)

src/jit/inst_mem_handler.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,10 @@ macro_rules! imm_breakout {
161161
$asm.runtime_data.branch_out_total_cycles = $total_cycles;
162162
crate::core::emu::get_regs_mut!($asm.emu, CPU).pc = $pc + if $thumb { 3 } else { 4 };
163163
crate::core::emu::get_mem_mut!($asm.emu).breakout_imm = false;
164+
// r4-r12,pc since we need an even amount of registers for 8 byte alignment, in case the compiler decides to use neon instructions
164165
std::arch::asm!(
165166
"mov sp, {}",
166-
"pop {{r4-r11,pc}}",
167+
"pop {{r4-r12,pc}}",
167168
in(reg) $asm.runtime_data.host_sp
168169
);
169170
std::hint::unreachable_unchecked();
@@ -215,9 +216,10 @@ pub unsafe extern "C" fn inst_mem_handler_multiple<
215216
get_regs_mut!((*asm).emu, CPU).pc &= !1;
216217
}
217218
}
219+
// r4-r12,pc since we need an even amount of registers for 8 byte alignment, in case the compiler decides to use neon instructions
218220
asm!(
219221
"mov sp, {}",
220-
"pop {{r4-r11,pc}}",
222+
"pop {{r4-r12,pc}}",
221223
in(reg) (*asm).runtime_data.host_sp
222224
);
223225
unreachable_unchecked();

0 commit comments

Comments
 (0)