Skip to content

Commit 9d36644

Browse files
committed
Squash guest reg loads and stores into multiple transfers
1 parent d60f739 commit 9d36644

File tree

6 files changed

+166
-62
lines changed

6 files changed

+166
-62
lines changed

src/jit/assembler/basic_block.rs

Lines changed: 113 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,8 @@ impl BasicBlock {
242242
}
243243

244244
pub fn remove_dead_code(&mut self, asm: &mut BlockAsm) {
245-
let mut current_node = self.insts_link.root;
246-
let mut i = 0;
247-
while !current_node.is_null() {
248-
let inst_i = BlockInstList::deref(current_node).value;
249-
let inst = &mut asm.buf.insts[inst_i];
245+
for (i, entry) in self.insts_link.iter().enumerate() {
246+
let inst = &mut asm.buf.insts[entry.value];
250247
if let BlockInstKind::RestoreReg { guest_reg, .. } = &inst.kind {
251248
if *guest_reg != Reg::CPSR {
252249
let (_, outputs) = inst.get_io();
@@ -255,9 +252,119 @@ impl BasicBlock {
255252
}
256253
}
257254
}
255+
}
256+
}
257+
258+
fn flush_reg_io_consolidation(&mut self, asm: &mut BlockAsm, from_reg: Reg, to_reg: Reg, save: bool, start_i: usize, end_i: usize, end_node: *mut BlockInstListEntry) {
259+
let mut current_node = end_node;
260+
let mut live_range_accumulated = BlockRegSet::new();
261+
for i in start_i..=end_i {
262+
let entry = BlockInstList::deref(current_node);
263+
let inst = &mut asm.buf.insts[entry.value];
264+
inst.skip = true;
265+
current_node = entry.previous;
266+
live_range_accumulated += self.regs_live_ranges[i];
267+
}
268+
269+
for i in start_i..=end_i {
270+
self.regs_live_ranges[i] = live_range_accumulated;
271+
}
272+
273+
let end_entry = BlockInstList::deref(end_node);
274+
275+
let mut thread_regs_addr_reg = asm.thread_regs_addr_reg;
276+
if from_reg as u8 > 0 {
277+
thread_regs_addr_reg = asm.tmp_operand_imm_reg;
278+
let previous_entry = BlockInstList::deref(end_entry.previous);
279+
let previous_inst = &mut asm.buf.insts[previous_entry.value];
280+
*previous_inst = BlockInstKind::Alu3 {
281+
op: BlockAluOp::Add,
282+
operands: [thread_regs_addr_reg.into(), asm.thread_regs_addr_reg.into(), (from_reg as u32 * 4).into()],
283+
set_cond: BlockAluSetCond::None,
284+
thumb_pc_aligned: false,
285+
}
286+
.into();
287+
self.used_regs[end_i - 1] += thread_regs_addr_reg;
288+
self.used_regs[end_i] += thread_regs_addr_reg;
289+
self.regs_live_ranges[end_i] += thread_regs_addr_reg;
290+
}
291+
292+
let end_inst = &mut asm.buf.insts[end_entry.value];
293+
let op = if save { BlockTransferOp::Write } else { BlockTransferOp::Read };
294+
295+
let mut guest_regs = RegReserve::new();
296+
for reg in from_reg as u8..=to_reg as u8 {
297+
guest_regs += Reg::from(reg);
298+
}
299+
*end_inst = BlockInstKind::GuestTransferMultiple {
300+
op,
301+
addr_reg: thread_regs_addr_reg,
302+
addr_out_reg: thread_regs_addr_reg,
303+
gp_regs: guest_regs,
304+
fixed_regs: RegReserve::new(),
305+
write_back: false,
306+
pre: false,
307+
add_to_base: true,
308+
}
309+
.into();
310+
self.used_regs[end_i].add_guests(guest_regs);
311+
}
312+
313+
pub fn consolidate_reg_io(&mut self, asm: &mut BlockAsm) {
314+
let mut count = 0;
315+
let mut target_reg = Reg::None;
316+
let mut target_save = false;
317+
let mut last_reg = Reg::None;
318+
let mut was_save = None;
319+
let mut start_i = 0;
320+
let mut i = 0;
321+
322+
let mut current_node = self.insts_link.root;
323+
while !current_node.is_null() {
324+
let entry = BlockInstList::deref(current_node);
325+
let inst = &asm.buf.insts[entry.value];
326+
if !inst.skip {
327+
let mut flush = true;
328+
329+
match &inst.kind {
330+
BlockInstKind::SaveReg { guest_reg, .. } => {
331+
if was_save == Some(true) && *guest_reg <= Reg::R12 && last_reg as u8 + 1 == *guest_reg as u8 {
332+
count += 1;
333+
flush = false;
334+
target_reg = *guest_reg;
335+
target_save = true;
336+
}
337+
last_reg = *guest_reg;
338+
was_save = Some(true);
339+
}
340+
BlockInstKind::RestoreReg { guest_reg, .. } => {
341+
if was_save == Some(false) && *guest_reg <= Reg::R12 && last_reg as u8 + 1 == *guest_reg as u8 {
342+
count += 1;
343+
flush = false;
344+
target_reg = *guest_reg;
345+
target_save = false;
346+
}
347+
last_reg = *guest_reg;
348+
was_save = Some(false);
349+
}
350+
_ => {
351+
last_reg = Reg::None;
352+
was_save = None;
353+
}
354+
}
355+
356+
if flush && count > 0 {
357+
self.flush_reg_io_consolidation(asm, Reg::from(target_reg as u8 - count), target_reg, target_save, start_i, i - 1, entry.previous);
358+
count = 0;
359+
}
360+
361+
if count == 0 {
362+
start_i = i;
363+
}
364+
}
258365

366+
current_node = entry.next;
259367
i += 1;
260-
current_node = BlockInstList::deref(current_node).next;
261368
}
262369
}
263370

src/jit/assembler/block_asm.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ pub struct BlockAsm<'a> {
5252

5353
pub thread_regs_addr_reg: BlockReg,
5454
pub tmp_guest_cpsr_reg: BlockReg,
55-
tmp_operand_imm_reg: BlockReg,
55+
pub tmp_operand_imm_reg: BlockReg,
5656
tmp_shift_imm_reg: BlockReg,
5757
tmp_func_call_reg: BlockReg,
5858

@@ -919,6 +919,7 @@ impl<'a> BlockAsm<'a> {
919919
}
920920

921921
basic_block.remove_dead_code(self);
922+
basic_block.consolidate_reg_io(self);
922923
}
923924

924925
(basic_blocks, reachable_blocks)

src/jit/assembler/block_inst.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -952,7 +952,7 @@ impl Debug for BlockInstKind {
952952
let add_to_base = if *add_to_base { "+" } else { "-" };
953953
write!(
954954
f,
955-
"{op:?}M {addr_reg:?} -> {addr_out_reg:?} gp regs: {gp_regs:?}, fixed regs: {fixed_regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base"
955+
"Guest{op:?}M {addr_reg:?} -> {addr_out_reg:?} gp regs: {gp_regs:?}, fixed regs: {fixed_regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base"
956956
)
957957
}
958958
BlockInstKind::SystemReg { op, operand } => write!(f, "{op:?} {operand:?}"),

src/jit/assembler/block_reg_allocator.rs

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -121,50 +121,46 @@ impl BlockRegAllocator {
121121
None
122122
}
123123

124-
fn allocate_and_spill(&mut self, any_reg: u16, used_regs: &[BlockRegSet], allowed_regs: RegReserve) -> Option<Reg> {
125-
let mut greatest_distance = 0;
126-
let mut greatest_distance_reg = 0;
124+
fn allocate_and_spill(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet], allowed_regs: RegReserve) -> Option<Reg> {
127125
for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
126+
let reg = Reg::from(i as u8);
127+
128+
if mapped_reg.is_none() && allowed_regs.is_reserved(reg) && !live_ranges[1].contains(BlockReg::Fixed(reg)) && !used_regs[0].contains(BlockReg::Fixed(reg)) {
129+
self.set_stored_mapping(any_reg, reg);
130+
return Some(reg);
131+
}
132+
}
133+
134+
for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
135+
let reg = Reg::from(i as u8);
136+
128137
if let &Some(mapped_reg) = mapped_reg {
129-
let reg = Reg::from(i as u8);
130-
if allowed_regs.is_reserved(reg) && !used_regs[0].contains(BlockReg::Any(mapped_reg)) {
131-
for i in 1..used_regs.len() {
132-
if used_regs[i].contains(BlockReg::Any(mapped_reg)) || used_regs[i].contains(BlockReg::Fixed(reg)) {
133-
if i > greatest_distance {
134-
greatest_distance = i;
135-
greatest_distance_reg = mapped_reg;
136-
}
137-
break;
138-
}
139-
}
138+
if allowed_regs.is_reserved(reg) && !used_regs[0].contains(BlockReg::Any(mapped_reg)) && !live_ranges[1].contains(BlockReg::Any(mapped_reg)) {
139+
self.swap_stored_mapping(any_reg, mapped_reg);
140+
return Some(reg);
140141
}
141142
}
142143
}
143144

144-
if greatest_distance != 0 {
145-
let reg = self.stored_mapping[greatest_distance_reg as usize];
146-
self.spilled += BlockReg::Any(greatest_distance_reg);
147-
self.gen_pre_handle_spilled_inst(greatest_distance_reg, reg, BlockTransferOp::Write);
148-
self.swap_stored_mapping(any_reg, greatest_distance_reg);
149-
return Some(reg);
145+
for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
146+
let reg = Reg::from(i as u8);
147+
148+
if let &Some(mapped_reg) = mapped_reg {
149+
if allowed_regs.is_reserved(reg) && !used_regs[0].contains(BlockReg::Any(mapped_reg)) {
150+
self.spilled += BlockReg::Any(mapped_reg);
151+
self.gen_pre_handle_spilled_inst(mapped_reg, reg, BlockTransferOp::Write);
152+
self.swap_stored_mapping(any_reg, mapped_reg);
153+
return Some(reg);
154+
}
155+
}
150156
}
151157

152158
None
153159
}
154160

155161
fn allocate_local(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Reg {
156-
let mut live_ranges_until_expiration = BlockRegSet::new();
157-
let mut used_fixed_until_expiration = used_regs[0].get_fixed();
158-
for i in 1..live_ranges.len() {
159-
if !live_ranges[i].contains(BlockReg::Any(any_reg)) {
160-
break;
161-
}
162-
live_ranges_until_expiration += live_ranges[i];
163-
used_fixed_until_expiration += used_regs[i].get_fixed();
164-
}
165-
166162
for reg in SCRATCH_REGS {
167-
if !used_fixed_until_expiration.is_reserved(reg) && self.stored_mapping_reverse[reg as usize].is_none() && !live_ranges[1].contains(BlockReg::Fixed(reg)) {
163+
if self.stored_mapping_reverse[reg as usize].is_none() && !live_ranges[1].contains(BlockReg::Fixed(reg)) {
168164
self.set_stored_mapping(any_reg, reg);
169165
return reg;
170166
}
@@ -174,22 +170,7 @@ impl BlockRegAllocator {
174170
return reg;
175171
}
176172

177-
for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
178-
if let &Some(mapped_reg) = mapped_reg {
179-
let reg = Reg::from(i as u8);
180-
if !used_regs[0].contains(BlockReg::Any(mapped_reg))
181-
&& !live_ranges_until_expiration.contains(BlockReg::Any(mapped_reg))
182-
&& !live_ranges_until_expiration.contains(BlockReg::Fixed(reg))
183-
{
184-
self.spilled += BlockReg::Any(mapped_reg);
185-
self.gen_pre_handle_spilled_inst(mapped_reg, reg, BlockTransferOp::Write);
186-
self.swap_stored_mapping(any_reg, mapped_reg);
187-
return reg;
188-
}
189-
}
190-
}
191-
192-
if let Some(reg) = self.allocate_and_spill(any_reg, used_regs, SCRATCH_REGS + ALLOCATION_REGS) {
173+
if let Some(reg) = self.allocate_and_spill(any_reg, live_ranges, used_regs, SCRATCH_REGS + ALLOCATION_REGS) {
193174
return reg;
194175
}
195176

@@ -201,7 +182,11 @@ impl BlockRegAllocator {
201182
return reg;
202183
}
203184

204-
if let Some(reg) = self.allocate_and_spill(any_reg, used_regs, ALLOCATION_REGS) {
185+
if let Some(reg) = self.allocate_and_spill(any_reg, live_ranges, used_regs, ALLOCATION_REGS) {
186+
return reg;
187+
}
188+
189+
if let Some(reg) = self.allocate_and_spill(any_reg, live_ranges, used_regs, SCRATCH_REGS) {
205190
return reg;
206191
}
207192

@@ -350,11 +335,17 @@ impl BlockRegAllocator {
350335

351336
if DEBUG && unsafe { BLOCK_LOG } {
352337
println!("inputs: {inputs:?}, outputs: {outputs:?}");
338+
println!("used regs {:?}", used_regs[0]);
353339
}
354340

355341
self.relocate_guest_regs(inputs.get_guests().get_gp_regs(), live_ranges, &inputs, used_regs, true);
356342
self.relocate_guest_regs(outputs.get_guests().get_gp_regs(), live_ranges, &inputs, used_regs, false);
357343

344+
if DEBUG && unsafe { BLOCK_LOG } {
345+
println!("pre mapping {:?}", self.stored_mapping_reverse);
346+
println!("pre spilled {:?}", self.spilled);
347+
}
348+
358349
for any_input_reg in inputs.iter_any() {
359350
let reg = self.get_input_reg(any_input_reg, live_ranges, used_regs);
360351
inst.replace_input_regs(BlockReg::Any(any_input_reg), BlockReg::Fixed(reg));
@@ -370,6 +361,11 @@ impl BlockRegAllocator {
370361
inst.replace_output_regs(BlockReg::Any(any_output_reg), BlockReg::Fixed(reg));
371362
self.dirty_regs += reg;
372363
}
364+
365+
if DEBUG && unsafe { BLOCK_LOG } {
366+
println!("after mapping {:?}", self.stored_mapping_reverse);
367+
println!("after spilled {:?}", self.spilled);
368+
}
373369
}
374370

375371
pub fn ensure_global_mappings(&mut self, output_regs: BlockRegSet) {
@@ -439,7 +435,7 @@ impl BlockRegAllocator {
439435
self.spilled -= BlockReg::Any(output_reg);
440436
self.gen_pre_handle_spilled_inst(output_reg, desired_reg_mapping, BlockTransferOp::Read);
441437
} else {
442-
panic!("required output reg must already have a value");
438+
panic!("required output reg {output_reg:?} must already have a value");
443439
}
444440
self.set_stored_mapping(output_reg, desired_reg_mapping);
445441
}

src/jit/emitter/emit_transfer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
258258
let fast_mem_mark_dirty_label = block_asm.new_label();
259259
let continue_label = block_asm.new_label();
260260

261-
if is_valid && !inst_info.op.mem_is_write() && !inst_info.op.mem_transfer_user() && rlist.len() < RegReserve::gp().len() - 2 {
261+
if is_valid && !inst_info.op.mem_is_write() && !inst_info.op.mem_transfer_user() && rlist.len() < (RegReserve::gp() + Reg::LR).len() - 2 {
262262
let mut gp_regs = rlist.get_gp_regs();
263263
let mut free_gp_regs = if gp_regs.is_empty() {
264264
RegReserve::gp()

src/jit/jit_asm.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,12 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
186186
}
187187

188188
let jit_entry = {
189-
// unsafe { BLOCK_LOG = guest_pc == 0x2001b04 };
189+
// unsafe { BLOCK_LOG = guest_pc == 0x2097454 };
190190

191191
let mut block_asm = asm.new_block_asm(false);
192192

193193
if DEBUG_LOG {
194-
block_asm.call2(debug_enter_block::<CPU> as *const (), asm as *mut _ as u32, guest_pc | (THUMB as u32));
194+
block_asm.call1(debug_enter_block::<CPU> as *const (), guest_pc | (THUMB as u32));
195195
block_asm.restore_reg(Reg::CPSR);
196196
}
197197

@@ -200,7 +200,7 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
200200
asm.jit_buf.current_pc = guest_pc + (i << if THUMB { 1 } else { 2 }) as u32;
201201
debug_println!("{CPU:?} emitting {:?} at pc: {:x}", asm.jit_buf.current_inst(), asm.jit_buf.current_pc);
202202

203-
// if asm.jit_buf.current_pc == 0x20a7f9c {
203+
// if asm.jit_buf.current_pc == 0x20098ca {
204204
// block_asm.bkpt(1);
205205
// }
206206

@@ -354,6 +354,6 @@ unsafe extern "C" fn debug_after_exec_op<const CPU: CpuType>(pc: u32, opcode: u3
354354
debug_inst_info::<CPU>(get_regs!((*asm).emu, CPU), pc, &format!("\n\t{:?} {:?}", CPU, inst_info));
355355
}
356356

357-
extern "C" fn debug_enter_block<const CPU: CpuType>(asm: *mut JitAsm<CPU>, pc: u32) {
357+
extern "C" fn debug_enter_block<const CPU: CpuType>(pc: u32) {
358358
println!("{CPU:?} execute {pc:x}");
359359
}

0 commit comments

Comments
 (0)