Skip to content

Commit b09695e

Browse files
committed
WIP
1 parent c373fef commit b09695e

File tree

10 files changed

+176
-89
lines changed

10 files changed

+176
-89
lines changed

src/core/hle/power_manager_hle.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ impl PowerManagerHle {
2323

2424
let cmd = (self.data[0] >> 8) - 0x60;
2525
match cmd {
26+
1 => {
27+
Arm7Hle::send_ipc_fifo(0x8, 0x0300E300, 0, emu);
28+
}
2629
3 => {
2730
Arm7Hle::send_ipc_fifo(0x8, 0x0300E300, 0, emu);
2831
}

src/jit/assembler/basic_block.rs

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
use crate::jit::assembler::arm::alu_assembler::AluShiftImm;
12
use crate::jit::assembler::block_asm::{BlockAsm, BLOCK_LOG};
23
use crate::jit::assembler::block_inst::{BlockAluOp, BlockAluSetCond, BlockSystemRegOp, BlockTransferOp};
34
use crate::jit::assembler::block_inst_list::{BlockInstList, BlockInstListEntry};
45
use crate::jit::assembler::block_reg_set::BlockRegSet;
5-
use crate::jit::assembler::{BlockAsmBuf, BlockInstKind};
6+
use crate::jit::assembler::{BlockAsmBuf, BlockInstKind, BlockLabel};
67
use crate::jit::reg::{Reg, RegReserve};
78
use crate::jit::{MemoryAmount, ShiftType};
89
use crate::IS_DEBUG;
@@ -14,6 +15,9 @@ pub struct BasicBlock {
1415
pub block_entry_start: *mut BlockInstListEntry,
1516
pub block_entry_end: *mut BlockInstListEntry,
1617

18+
pub pad_label: Option<BlockLabel>,
19+
pub pad_size: usize,
20+
1721
pub guest_regs_resolved: bool,
1822
pub guest_regs_input_dirty: RegReserve,
1923
pub guest_regs_output_dirty: RegReserve,
@@ -27,6 +31,8 @@ pub struct BasicBlock {
2731
pub insts_link: BlockInstList,
2832

2933
pub start_pc: u32,
34+
35+
pub opcodes: Vec<u32>,
3036
}
3137

3238
impl BasicBlock {
@@ -37,6 +43,9 @@ impl BasicBlock {
3743
block_entry_start,
3844
block_entry_end,
3945

46+
pad_label: None,
47+
pad_size: 0,
48+
4049
guest_regs_resolved: false,
4150
guest_regs_input_dirty: RegReserve::new(),
4251
guest_regs_output_dirty: RegReserve::new(),
@@ -50,6 +59,8 @@ impl BasicBlock {
5059
insts_link: BlockInstList::new(),
5160

5261
start_pc: 0,
62+
63+
opcodes: Vec::new(),
5364
}
5465
}
5566

@@ -113,6 +124,7 @@ impl BasicBlock {
113124
add_inst = false;
114125
}
115126
BlockInstKind::SaveReg { .. } | BlockInstKind::MarkRegDirty { .. } => {}
127+
BlockInstKind::PadBlock(label) => self.pad_label = Some(*label),
116128
_ => {
117129
let (inputs, _) = asm.buf.insts[i].get_io();
118130
for guest_reg in inputs.get_guests() {
@@ -300,36 +312,48 @@ impl BasicBlock {
300312
}
301313
}
302314

303-
pub fn emit_opcodes(&self, asm: &mut BlockAsm, opcodes_offset: usize, used_host_regs: RegReserve) -> Vec<u32> {
304-
let mut opcodes = Vec::new();
315+
pub fn emit_opcodes(&mut self, asm: &mut BlockAsm, opcodes_offset: usize, block_index: usize, used_host_regs: RegReserve) {
316+
if IS_DEBUG && unsafe { BLOCK_LOG } && opcodes_offset != 0 {
317+
self.opcodes.clear();
318+
}
319+
320+
if !self.opcodes.is_empty() {
321+
return;
322+
}
323+
324+
asm.buf.branch_placeholders[block_index].clear();
325+
305326
let mut inst_opcodes = Vec::new();
306327
for entry in self.insts_link.iter() {
307328
let inst = &mut asm.buf.insts[entry.value];
308329
if inst.skip {
309330
continue;
310331
}
311332

312-
if IS_DEBUG && unsafe { BLOCK_LOG } {
333+
if IS_DEBUG && unsafe { BLOCK_LOG } && opcodes_offset != 0 {
313334
match &inst.kind {
314335
BlockInstKind::GuestPc(pc) => {
315-
println!("(0x{:x}, 0x{pc:x}),", opcodes.len() + opcodes_offset);
336+
println!("(0x{:x}, 0x{pc:x}),", self.opcodes.len() + opcodes_offset);
316337
}
317338
BlockInstKind::Label { guest_pc: Some(pc), .. } => {
318-
println!("(0x{:x}, 0x{pc:x}),", opcodes.len() + opcodes_offset);
339+
println!("(0x{:x}, 0x{pc:x}),", self.opcodes.len() + opcodes_offset);
319340
}
320341
_ => {}
321342
}
322343
}
323344

324345
inst_opcodes.clear();
325346
inst.kind
326-
.emit_opcode(&mut inst_opcodes, opcodes.len(), &mut asm.buf.branch_placeholders, opcodes_offset, used_host_regs);
347+
.emit_opcode(&mut inst_opcodes, self.opcodes.len(), &mut asm.buf.branch_placeholders[block_index], used_host_regs);
327348
for opcode in &mut inst_opcodes {
328349
*opcode = (*opcode & !(0xF << 28)) | ((inst.cond as u32) << 28);
329350
}
330-
opcodes.extend(&inst_opcodes);
351+
self.opcodes.extend(&inst_opcodes);
352+
}
353+
354+
for _ in self.opcodes.len()..self.pad_size {
355+
self.opcodes.push(AluShiftImm::mov_al(Reg::R0, Reg::R0));
331356
}
332-
opcodes
333357
}
334358
}
335359

src/jit/assembler/block_asm.rs

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,10 @@ impl<'a> BlockAsm<'a> {
722722
}
723723
}
724724

725+
pub fn pad_block(&mut self, label: BlockLabel) {
726+
self.insert_inst(BlockInstKind::PadBlock(label));
727+
}
728+
725729
// Convert guest pc with labels into labels
726730
fn resolve_labels(&mut self, label_aliases: &mut NoHashMap<u16, u16>) {
727731
let mut previous_label: Option<(BlockLabel, Option<u32>)> = None;
@@ -965,6 +969,8 @@ impl<'a> BlockAsm<'a> {
965969
basic_block.remove_dead_code(self);
966970
}
967971

972+
self.buf.basic_block_label_mapping = basic_block_label_mapping;
973+
968974
(basic_blocks, reachable_blocks)
969975
}
970976

@@ -1041,45 +1047,57 @@ impl<'a> BlockAsm<'a> {
10411047
}
10421048

10431049
self.buf.opcodes.clear();
1044-
self.buf.block_opcode_offsets.clear();
1045-
self.buf.branch_placeholders.clear();
1050+
self.buf.block_opcode_offsets.resize(basic_blocks.len(), 0);
1051+
self.buf.branch_placeholders.resize(basic_blocks.len(), Vec::new());
10461052

1047-
for (i, basic_block) in basic_blocks.iter().enumerate() {
1053+
for i in 0..basic_blocks.len() {
1054+
if let Some(label) = basic_blocks[i].pad_label {
1055+
let block_to_pad_to = *self.buf.basic_block_label_mapping.get(&label.0).unwrap();
1056+
self.buf.branch_placeholders[block_to_pad_to].clear();
1057+
basic_blocks[block_to_pad_to].emit_opcodes(self, 0, block_to_pad_to, used_host_regs);
1058+
basic_blocks[i].pad_size = basic_blocks[block_to_pad_to].opcodes.len();
1059+
}
1060+
}
1061+
1062+
for (i, basic_block) in basic_blocks.iter_mut().enumerate() {
10481063
let opcodes_len = self.buf.opcodes.len();
1049-
self.buf.block_opcode_offsets.push(opcodes_len);
1064+
self.buf.block_opcode_offsets[i] = opcodes_len;
10501065

10511066
if !reachable_blocks.contains(&i) {
10521067
continue;
10531068
}
10541069

1055-
let opcodes = basic_block.emit_opcodes(self, opcodes_len, used_host_regs);
1056-
self.buf.opcodes.extend(opcodes);
1070+
basic_block.emit_opcodes(self, opcodes_len, i, used_host_regs);
1071+
self.buf.opcodes.extend(&basic_block.opcodes);
10571072
}
10581073

10591074
self.buf.opcodes.len()
10601075
}
10611076

10621077
pub fn finalize(&mut self, jit_mem_offset: usize) -> &Vec<u32> {
1063-
for &branch_placeholder in &self.buf.branch_placeholders {
1064-
let encoding = BranchEncoding::from(self.buf.opcodes[branch_placeholder]);
1065-
if Cond::from(u8::from(encoding.cond())) == Cond::NV {
1066-
self.buf.opcodes[branch_placeholder] = AluShiftImm::mov_al(Reg::R0, Reg::R0);
1067-
continue;
1068-
}
1078+
for (block_index, branch_placeholders) in self.buf.branch_placeholders.iter().enumerate() {
1079+
for branch_placeholder in branch_placeholders {
1080+
let index = self.buf.block_opcode_offsets[block_index] + *branch_placeholder;
1081+
let encoding = BranchEncoding::from(self.buf.opcodes[index]);
1082+
if Cond::from(u8::from(encoding.cond())) == Cond::NV {
1083+
self.buf.opcodes[index] = AluShiftImm::mov_al(Reg::R0, Reg::R0);
1084+
continue;
1085+
}
10691086

1070-
let diff = if encoding.is_call_common() {
1071-
let opcode_index = (jit_mem_offset >> 2) + branch_placeholder;
1072-
let branch_to = u32::from(encoding.index()) >> 2;
1073-
branch_to as i32 - opcode_index as i32
1074-
} else {
1075-
let block_index = u32::from(encoding.index());
1076-
let branch_to = self.buf.block_opcode_offsets[block_index as usize];
1077-
branch_to as i32 - branch_placeholder as i32
1078-
};
1079-
if diff == 1 && !encoding.has_return() {
1080-
self.buf.opcodes[branch_placeholder] = AluShiftImm::mov_al(Reg::R0, Reg::R0);
1081-
} else {
1082-
self.buf.opcodes[branch_placeholder] = if encoding.has_return() { B::bl } else { B::b }(diff - 2, Cond::from(u8::from(encoding.cond())));
1087+
let diff = if encoding.is_call_common() {
1088+
let opcode_index = (jit_mem_offset >> 2) + index;
1089+
let branch_to = u32::from(encoding.index()) >> 2;
1090+
branch_to as i32 - opcode_index as i32
1091+
} else {
1092+
let block_index = u32::from(encoding.index());
1093+
let branch_to = self.buf.block_opcode_offsets[block_index as usize];
1094+
branch_to as i32 - index as i32
1095+
};
1096+
if diff == 1 && !encoding.has_return() {
1097+
self.buf.opcodes[index] = AluShiftImm::mov_al(Reg::R0, Reg::R0);
1098+
} else {
1099+
self.buf.opcodes[index] = if encoding.has_return() { B::bl } else { B::b }(diff - 2, Cond::from(u8::from(encoding.cond())));
1100+
}
10831101
}
10841102
}
10851103

src/jit/assembler/block_inst.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ impl BlockInstKind {
607607
opcodes.push(LdrStrImm::strb_offset_al(host_reg, thread_regs_addr_reg, Reg::CPSR as u16 * 4 + 3));
608608
}
609609

610-
pub fn emit_opcode(&mut self, opcodes: &mut Vec<u32>, opcode_index: usize, branch_placeholders: &mut Vec<usize>, opcodes_offset: usize, used_host_regs: RegReserve) {
610+
pub fn emit_opcode(&mut self, opcodes: &mut Vec<u32>, opcode_index: usize, branch_placeholders: &mut Vec<usize>, used_host_regs: RegReserve) {
611611
let alu_reg = |op: BlockAluOp, op0: BlockReg, op1: BlockReg, op2: BlockReg, shift: BlockShift, set_cond: bool| match shift.value {
612612
BlockOperand::Reg(shift_reg) => AluReg::generic(op as u8, op0.as_fixed(), op1.as_fixed(), op2.as_fixed(), shift.shift_type, shift_reg.as_fixed(), set_cond, Cond::AL),
613613
BlockOperand::Imm(shift_imm) => {
@@ -781,7 +781,7 @@ impl BlockInstKind {
781781
// Encode label
782782
// Branch offset can only be figured out later
783783
opcodes.push(BranchEncoding::new(u26::new(*block_index as u32), false, false, u4::new(Cond::AL as u8)).into());
784-
branch_placeholders.push(opcodes_offset + opcode_index);
784+
branch_placeholders.push(opcode_index);
785785
}
786786

787787
BlockInstKind::SaveContext { .. } => unsafe { unreachable_unchecked() },
@@ -816,7 +816,7 @@ impl BlockInstKind {
816816
// Encode common offset
817817
// Branch offset can only be figured out later
818818
opcodes.push(BranchEncoding::new(u26::new(*mem_offset as u32), *has_return, true, u4::new(Cond::AL as u8)).into());
819-
branch_placeholders.push(opcodes_offset + opcode_index);
819+
branch_placeholders.push(opcode_index);
820820
}
821821
BlockInstKind::Bkpt(id) => opcodes.push(Bkpt::bkpt(*id)),
822822
BlockInstKind::Nop => opcodes.push(AluShiftImm::mov_al(Reg::R0, Reg::R0)),

src/jit/assembler/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,17 +347,19 @@ pub struct BlockLabel(u16);
347347

348348
pub struct BlockAsmBuf {
349349
pub insts: Vec<BlockInst>,
350+
pub basic_block_label_mapping: NoHashMap<u16, usize>,
350351
pub guest_branches_mapping: NoHashMap<u32, BlockLabel>,
351352
pub reg_allocator: BlockRegAllocator,
352353
pub block_opcode_offsets: Vec<usize>,
353354
pub opcodes: Vec<u32>,
354-
pub branch_placeholders: Vec<usize>,
355+
pub branch_placeholders: Vec<Vec<usize>>,
355356
}
356357

357358
impl BlockAsmBuf {
358359
pub fn new() -> Self {
359360
BlockAsmBuf {
360361
insts: Vec::new(),
362+
basic_block_label_mapping: NoHashMap::default(),
361363
guest_branches_mapping: NoHashMap::default(),
362364
reg_allocator: BlockRegAllocator::new(),
363365
block_opcode_offsets: Vec::new(),

src/jit/emitter/emit_transfer.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::core::CpuType;
44
use crate::jit::assembler::block_asm::BlockAsm;
55
use crate::jit::assembler::{BlockOperand, BlockReg};
66
use crate::jit::inst_info::Operand;
7+
use crate::jit::inst_jit_handler::inst_slow_mem_patch;
78
use crate::jit::inst_mem_handler::{inst_mem_handler, inst_mem_handler_multiple, inst_mem_handler_swp};
89
use crate::jit::jit_asm::JitAsm;
910
use crate::jit::op::Op;
@@ -218,10 +219,12 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
218219
let fast_read_next_addr_reg = block_asm.new_reg();
219220
let fast_read_addr_masked_reg = block_asm.new_reg();
220221

222+
let slow_read_patch_label = block_asm.new_label();
221223
let slow_read_label = block_asm.new_label();
222224
let continue_label = block_asm.new_label();
223225

224226
block_asm.branch(slow_read_label, Cond::NV);
227+
block_asm.pad_block(slow_read_label);
225228

226229
let mmu = get_mmu!(self.emu, CPU);
227230
let base_ptr = mmu.get_base_tcm_ptr();
@@ -249,7 +252,12 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
249252
}
250253

251254
block_asm.branch_fallthrough(continue_label, Cond::AL);
255+
block_asm.branch(slow_read_patch_label, Cond::AL);
256+
257+
block_asm.label_unlikely(slow_read_patch_label);
258+
block_asm.call(inst_slow_mem_patch as *const ());
252259
block_asm.branch(slow_read_label, Cond::AL);
260+
253261
block_asm.label_unlikely(slow_read_label);
254262

255263
block_asm.restore_reg(op0);
@@ -322,7 +330,7 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
322330

323331
let cpsr_backup_reg = block_asm.new_reg();
324332

325-
let use_fast_mem = is_valid && !inst_info.op.mem_transfer_user() && rlist.len() < (RegReserve::gp() + Reg::LR).len() - 2;
333+
let use_fast_mem = false && is_valid && !inst_info.op.mem_transfer_user() && rlist.len() < (RegReserve::gp() + Reg::LR).len() - 2;
326334
if use_fast_mem {
327335
let mut gp_regs = rlist.get_gp_regs();
328336
let mut free_gp_regs = if gp_regs.is_empty() {

src/jit/inst_jit_handler.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
use crate::jit::assembler::arm::alu_assembler::AluShiftImm;
2+
use crate::jit::disassembler::lookup_table::lookup_opcode;
3+
use crate::jit::op::Op;
4+
use crate::jit::reg::Reg;
5+
use crate::mmap::flush_icache;
6+
use core::slice;
7+
use std::arch::asm;
8+
9+
pub unsafe extern "C" fn inst_slow_mem_patch() {
10+
let mut lr: u32;
11+
asm!("mov {}, lr", out(reg) lr);
12+
13+
let nop_opcode = AluShiftImm::mov_al(Reg::R0, Reg::R0);
14+
15+
let mut slow_mem_start = 0;
16+
for pc_offset in (4..256).step_by(4) {
17+
let ptr = (lr + pc_offset) as *const u32;
18+
let opcode = ptr.read();
19+
if opcode == nop_opcode {
20+
slow_mem_start = ptr as usize + 4;
21+
break;
22+
}
23+
}
24+
debug_assert_ne!(slow_mem_start, 0);
25+
26+
let mut slow_mem_end = slow_mem_start;
27+
let mut fast_mem_end = 0;
28+
for pc_offset in (4..256).step_by(4) {
29+
let ptr = (slow_mem_start + pc_offset) as *const u32;
30+
let opcode = ptr.read();
31+
let (op, func) = lookup_opcode(opcode);
32+
if *op == Op::B {
33+
let inst = func(opcode, *op);
34+
slow_mem_end = ptr as usize;
35+
let relative_pc = *inst.operands()[0].as_imm().unwrap() as i32 + 8;
36+
let target_pc = slow_mem_end as i32 + relative_pc;
37+
fast_mem_end = target_pc as usize - 4;
38+
slow_mem_end -= 4;
39+
break;
40+
}
41+
}
42+
debug_assert_ne!(slow_mem_end, slow_mem_start);
43+
debug_assert_ne!(fast_mem_end, 0);
44+
45+
let mut fast_mem_start = 0;
46+
let mut found_non_op = false;
47+
for pc_offset in (4..256).step_by(4) {
48+
let ptr = (fast_mem_end - pc_offset) as *const u32;
49+
let opcode = ptr.read();
50+
if found_non_op {
51+
if opcode == nop_opcode {
52+
fast_mem_start = ptr as usize;
53+
break;
54+
}
55+
} else if opcode != nop_opcode {
56+
found_non_op = true;
57+
}
58+
}
59+
debug_assert_ne!(fast_mem_start, 0);
60+
61+
println!("fast {fast_mem_start:x} - {fast_mem_end:x} slow {slow_mem_start:x} - {slow_mem_end:x}");
62+
63+
let slow_mem_size = ((slow_mem_end - slow_mem_start) >> 2) + 1;
64+
let fast_mem_size = ((fast_mem_end - fast_mem_start) >> 2) + 1;
65+
66+
let fast_mem = slice::from_raw_parts_mut(fast_mem_start as *mut u32, fast_mem_size);
67+
let slow_mem = slice::from_raw_parts(slow_mem_start as *const u32, slow_mem_size);
68+
fast_mem[..slow_mem_size].copy_from_slice(slow_mem);
69+
fast_mem[slow_mem_size..].fill(nop_opcode);
70+
71+
flush_icache(fast_mem_start as _, fast_mem_size << 2);
72+
}

0 commit comments

Comments
 (0)