Skip to content

Commit 87d7afc

Browse files
committed
Relocate guest regs as often as possible
1 parent c65b291 commit 87d7afc

File tree

5 files changed

+86
-38
lines changed

5 files changed

+86
-38
lines changed

src/jit/assembler/block_reg_allocator.rs

Lines changed: 74 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
use crate::jit::assembler::block_asm::BLOCK_LOG;
12
use crate::jit::assembler::block_inst::{BlockAluOp, BlockAluSetCond, BlockInst, BlockInstKind, BlockTransferOp};
23
use crate::jit::assembler::block_reg_set::BlockRegSet;
34
use crate::jit::assembler::{BlockReg, ANY_REG_LIMIT};
45
use crate::jit::reg::{reg_reserve, Reg, RegReserve};
56
use crate::jit::MemoryAmount;
6-
use crate::utils::{HeapMem, NoHashMap, NoHashSet};
7+
use crate::utils::{HeapMem, NoHashMap};
8+
use std::hint::unreachable_unchecked;
9+
10+
const DEBUG: bool = true;
711

812
pub const ALLOCATION_REGS: RegReserve = reg_reserve!(Reg::R4, Reg::R5, Reg::R6, Reg::R7, Reg::R8, Reg::R9, Reg::R10, Reg::R11);
913
const SCRATCH_REGS: RegReserve = reg_reserve!(Reg::R0, Reg::R1, Reg::R2, Reg::R3, Reg::R12);
@@ -12,7 +16,7 @@ pub struct BlockRegAllocator {
1216
pub global_mapping: NoHashMap<u16, Reg>,
1317
stored_mapping: HeapMem<Reg, { ANY_REG_LIMIT as usize }>, // mappings to real registers
1418
stored_mapping_reverse: [Option<u16>; Reg::SP as usize],
15-
spilled: NoHashSet<u16>, // regs that are spilled
19+
spilled: BlockRegSet,
1620
pub dirty_regs: RegReserve,
1721
pub pre_allocate_insts: Vec<BlockInst>,
1822
}
@@ -23,7 +27,7 @@ impl BlockRegAllocator {
2327
global_mapping: NoHashMap::default(),
2428
stored_mapping: HeapMem::new(),
2529
stored_mapping_reverse: [None; Reg::SP as usize],
26-
spilled: NoHashSet::default(),
30+
spilled: BlockRegSet::new(),
2731
dirty_regs: RegReserve::new(),
2832
pre_allocate_insts: Vec::new(),
2933
}
@@ -36,9 +40,7 @@ impl BlockRegAllocator {
3640
for any_input_reg in input_regs.iter_any() {
3741
if let Some(&global_mapping) = self.global_mapping.get(&any_input_reg) {
3842
match global_mapping {
39-
Reg::None => {
40-
self.spilled.insert(any_input_reg);
41-
}
43+
Reg::None => self.spilled += BlockReg::Any(any_input_reg),
4244
_ => self.set_stored_mapping(any_input_reg, global_mapping),
4345
}
4446
}
@@ -140,7 +142,7 @@ impl BlockRegAllocator {
140142

141143
if greatest_distance != 0 {
142144
let reg = self.stored_mapping[greatest_distance_reg as usize];
143-
self.spilled.insert(greatest_distance_reg);
145+
self.spilled += BlockReg::Any(greatest_distance_reg);
144146
self.gen_pre_handle_spilled_inst(greatest_distance_reg, reg, BlockTransferOp::Write);
145147
self.swap_stored_mapping(any_reg, greatest_distance_reg);
146148
return Some(reg);
@@ -178,7 +180,7 @@ impl BlockRegAllocator {
178180
&& !live_ranges_until_expiration.contains(BlockReg::Any(mapped_reg))
179181
&& !live_ranges_until_expiration.contains(BlockReg::Fixed(reg))
180182
{
181-
self.spilled.insert(mapped_reg);
183+
self.spilled += BlockReg::Any(mapped_reg);
182184
self.gen_pre_handle_spilled_inst(mapped_reg, reg, BlockTransferOp::Write);
183185
self.swap_stored_mapping(any_reg, mapped_reg);
184186
return reg;
@@ -190,7 +192,7 @@ impl BlockRegAllocator {
190192
return reg;
191193
}
192194

193-
todo!()
195+
unsafe { unreachable_unchecked() }
194196
}
195197

196198
fn allocate_reg(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Reg {
@@ -202,18 +204,19 @@ impl BlockRegAllocator {
202204
return reg;
203205
}
204206

205-
todo!()
207+
unsafe { unreachable_unchecked() }
206208
}
207209

208210
fn get_input_reg(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Reg {
209211
match self.stored_mapping[any_reg as usize] {
210212
Reg::None => {
211-
if self.spilled.contains(&any_reg) {
213+
if self.spilled.contains(BlockReg::Any(any_reg)) {
212214
let reg = if live_ranges.last().unwrap().contains(BlockReg::Any(any_reg)) {
213215
self.allocate_reg(any_reg, live_ranges, used_regs)
214216
} else {
215217
self.allocate_local(any_reg, live_ranges, used_regs)
216218
};
219+
self.spilled -= BlockReg::Any(any_reg);
217220
self.gen_pre_handle_spilled_inst(any_reg, reg, BlockTransferOp::Read);
218221
reg
219222
} else {
@@ -228,7 +231,7 @@ impl BlockRegAllocator {
228231
if let Some(any_reg) = self.stored_mapping_reverse[fixed_reg as usize] {
229232
self.remove_stored_mapping(any_reg);
230233
if live_ranges[1].contains(BlockReg::Any(any_reg)) {
231-
self.spilled.insert(any_reg);
234+
self.spilled += BlockReg::Any(any_reg);
232235
self.gen_pre_handle_spilled_inst(any_reg, fixed_reg, BlockTransferOp::Write);
233236
}
234237
}
@@ -237,6 +240,7 @@ impl BlockRegAllocator {
237240
fn get_output_reg(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Reg {
238241
match self.stored_mapping[any_reg as usize] {
239242
Reg::None => {
243+
self.spilled -= BlockReg::Any(any_reg);
240244
if live_ranges.last().unwrap().contains(BlockReg::Any(any_reg)) {
241245
self.allocate_reg(any_reg, live_ranges, used_regs)
242246
} else {
@@ -247,14 +251,66 @@ impl BlockRegAllocator {
247251
}
248252
}
249253

254+
fn relocate_guest_regs(&mut self, guest_regs: RegReserve, live_ranges: &[BlockRegSet], is_input: bool) {
255+
let mut relocatable_regs = RegReserve::new();
256+
for guest_reg in guest_regs {
257+
if self.stored_mapping[guest_reg as usize] != guest_reg
258+
// Check if reg is used as a fixed input for something else
259+
&& (!SCRATCH_REGS.is_reserved(guest_reg) || !live_ranges[1].contains(BlockReg::Fixed(guest_reg)))
260+
{
261+
relocatable_regs += guest_reg;
262+
}
263+
}
264+
265+
for guest_reg in relocatable_regs {
266+
if let Some(currently_used_by) = self.stored_mapping_reverse[guest_reg as usize] {
267+
self.spilled += BlockReg::Any(currently_used_by);
268+
self.gen_pre_handle_spilled_inst(currently_used_by, guest_reg, BlockTransferOp::Write);
269+
self.remove_stored_mapping(currently_used_by);
270+
}
271+
}
272+
273+
for guest_reg in relocatable_regs {
274+
let reg_mapped = self.stored_mapping[guest_reg as usize];
275+
if reg_mapped != Reg::None {
276+
if is_input {
277+
self.gen_pre_move_reg(guest_reg, reg_mapped);
278+
}
279+
self.remove_stored_mapping(guest_reg as u16);
280+
self.set_stored_mapping(guest_reg as u16, guest_reg);
281+
relocatable_regs -= guest_reg;
282+
}
283+
}
284+
285+
for guest_reg in relocatable_regs {
286+
if is_input {
287+
debug_assert!(self.spilled.contains(BlockReg::Any(guest_reg as u16)));
288+
self.spilled -= BlockReg::Any(guest_reg as u16);
289+
self.gen_pre_handle_spilled_inst(guest_reg as u16, guest_reg, BlockTransferOp::Read);
290+
}
291+
self.set_stored_mapping(guest_reg as u16, guest_reg);
292+
}
293+
}
294+
250295
pub fn inst_allocate(&mut self, inst: &mut BlockInst, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) {
251296
self.pre_allocate_insts.clear();
252297

298+
if DEBUG && unsafe { BLOCK_LOG } {
299+
println!("allocate reg for {inst:?}");
300+
}
301+
253302
let (inputs, outputs) = inst.get_io();
254303
if inputs.is_empty() && outputs.is_empty() {
255304
return;
256305
}
257306

307+
if DEBUG && unsafe { BLOCK_LOG } {
308+
println!("inputs: {inputs:?}, outputs: {outputs:?}");
309+
}
310+
311+
self.relocate_guest_regs(inputs.get_guests().get_gp_regs(), live_ranges, true);
312+
self.relocate_guest_regs(outputs.get_guests().get_gp_regs(), live_ranges, false);
313+
258314
for any_input_reg in inputs.iter_any() {
259315
let reg = self.get_input_reg(any_input_reg, live_ranges, used_regs);
260316
inst.replace_input_regs(BlockReg::Any(any_input_reg), BlockReg::Fixed(reg));
@@ -276,18 +332,16 @@ impl BlockRegAllocator {
276332
self.pre_allocate_insts.clear();
277333

278334
for output_reg in output_regs.iter_any() {
279-
match self.global_mapping.get(&output_reg).unwrap() {
335+
match *self.global_mapping.get(&output_reg).unwrap() {
280336
Reg::None => {
281337
let stored_mapping = self.stored_mapping[output_reg as usize];
282338
if stored_mapping != Reg::None {
283339
self.remove_stored_mapping(output_reg);
284-
self.spilled.insert(output_reg);
340+
self.spilled += BlockReg::Any(output_reg);
285341
self.gen_pre_handle_spilled_inst(output_reg, stored_mapping, BlockTransferOp::Write);
286342
}
287343
}
288344
desired_reg_mapping => {
289-
let desired_reg_mapping = *desired_reg_mapping;
290-
291345
let stored_mapping = self.stored_mapping[output_reg as usize];
292346
if desired_reg_mapping == stored_mapping {
293347
// Already at correct register, skip
@@ -302,7 +356,7 @@ impl BlockRegAllocator {
302356
Reg::None => {
303357
// other any reg is part of predetermined spilled
304358
self.remove_stored_mapping(currently_used_by);
305-
self.spilled.insert(currently_used_by);
359+
self.spilled += BlockReg::Any(currently_used_by);
306360
self.gen_pre_handle_spilled_inst(currently_used_by, desired_reg_mapping, BlockTransferOp::Write);
307361
}
308362
_ => {
@@ -324,7 +378,7 @@ impl BlockRegAllocator {
324378
if !moved {
325379
// no unused any reg found, just spill the any reg using the desired reg
326380
self.remove_stored_mapping(currently_used_by);
327-
self.spilled.insert(currently_used_by);
381+
self.spilled += BlockReg::Any(currently_used_by);
328382
self.gen_pre_handle_spilled_inst(currently_used_by, desired_reg_mapping, BlockTransferOp::Write);
329383
}
330384
}
@@ -337,8 +391,8 @@ impl BlockRegAllocator {
337391
if stored_mapping != Reg::None {
338392
self.remove_stored_mapping(output_reg);
339393
self.gen_pre_move_reg(desired_reg_mapping, stored_mapping);
340-
} else if self.spilled.contains(&output_reg) {
341-
self.spilled.remove(&output_reg);
394+
} else if self.spilled.contains(BlockReg::Any(output_reg)) {
395+
self.spilled -= BlockReg::Any(output_reg);
342396
self.gen_pre_handle_spilled_inst(output_reg, desired_reg_mapping, BlockTransferOp::Read);
343397
} else {
344398
panic!("required output reg must already have a value");

src/jit/emitter/emit_branch.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,9 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
138138
let inst_info = self.jit_buf.current_inst();
139139
let target_pc_reg = *inst_info.operands()[0].as_reg_no_shift().unwrap();
140140

141-
block_asm.mov(Reg::PC, target_pc_reg);
142-
block_asm.save_context();
143-
144141
if target_pc_reg == Reg::LR {
142+
block_asm.mov(Reg::PC, target_pc_reg);
143+
block_asm.save_context();
145144
self.emit_branch_return_stack_common(block_asm, target_pc_reg.into());
146145
} else {
147146
self.emit_branch_reg_common(block_asm, target_pc_reg.into(), false);

src/jit/emitter/thumb/emit_branch_thumb.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,9 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
6767
let inst_info = self.jit_buf.current_inst();
6868
let target_pc_reg = *inst_info.operands()[0].as_reg_no_shift().unwrap();
6969

70-
block_asm.mov(Reg::PC, target_pc_reg);
71-
block_asm.save_context();
72-
7370
if target_pc_reg == Reg::LR {
71+
block_asm.mov(Reg::PC, target_pc_reg);
72+
block_asm.save_context();
7473
self.emit_branch_return_stack_common(block_asm, target_pc_reg.into());
7574
} else {
7675
self.emit_branch_reg_common(block_asm, target_pc_reg.into(), false);

src/jit/emitter/thumb/emit_thumb.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,7 @@ impl<'a, const CPU: CpuType> JitAsm<'a, CPU> {
7979

8080
// R9 can be used as a substitution for SP for branch prediction
8181
if (op == Op::MovHT && self.jit_buf.current_inst().src_regs.is_reserved(Reg::LR))
82-
|| (op.is_multiple_mem_transfer()
83-
&& match *self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap() {
84-
Reg::R9 | Reg::SP => true,
85-
_ => false,
86-
})
82+
|| (op.is_multiple_mem_transfer() && matches!(*self.jit_buf.current_inst().operands()[0].as_reg_no_shift().unwrap(), Reg::R9 | Reg::SP))
8783
|| (op.is_single_mem_transfer() && (self.jit_buf.current_inst().src_regs.is_reserved(Reg::R9) || self.jit_buf.current_inst().src_regs.is_reserved(Reg::SP)))
8884
{
8985
let guest_pc_reg = block_asm.new_reg();

src/jit/jit_asm.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
186186
}
187187

188188
let jit_entry = {
189-
// unsafe { BLOCK_LOG = guest_pc == 0x3800f7c };
189+
// unsafe { BLOCK_LOG = guest_pc == 0x20026b4 };
190190

191191
let mut block_asm = asm.new_block_asm(false);
192192

@@ -204,7 +204,7 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
204204
asm.jit_buf.current_pc = guest_pc + (i << if THUMB { 1 } else { 2 }) as u32;
205205
debug_println!("{CPU:?} emitting {:?} at pc: {:x}", asm.jit_buf.current_inst(), asm.jit_buf.current_pc);
206206

207-
// if asm.jit_buf.current_pc == 0x2000950 {
207+
// if asm.jit_buf.current_pc == 0x20026e4 {
208208
// block_asm.bkpt(1);
209209
// }
210210

@@ -214,11 +214,11 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
214214
asm.emit(&mut block_asm);
215215
}
216216

217-
if DEBUG_LOG {
218-
block_asm.save_context();
219-
block_asm.call2(debug_after_exec_op::<CPU> as *const (), asm.jit_buf.current_pc, asm.jit_buf.current_inst().opcode);
220-
block_asm.restore_reg(Reg::CPSR);
221-
}
217+
// if DEBUG_LOG {
218+
// block_asm.save_context();
219+
// block_asm.call2(debug_after_exec_op::<CPU> as *const (), asm.jit_buf.current_pc, asm.jit_buf.current_inst().opcode);
220+
// block_asm.restore_reg(Reg::CPSR);
221+
// }
222222
}
223223

224224
let opcodes_len = block_asm.emit_opcodes(guest_pc, THUMB);

0 commit comments

Comments
 (0)