Skip to content

Commit 7a5f622

Browse files
committed
Cover more immediate ror cases
* Generate fewer relocation insts
1 parent 132f3b1 commit 7a5f622

File tree

2 files changed

+64
-10
lines changed

2 files changed

+64
-10
lines changed

src/jit/assembler/block_asm.rs

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::bitset::Bitset;
2+
use crate::jit::assembler::arm::alu_assembler::AluShiftImm;
23
use crate::jit::assembler::arm::branch_assembler::B;
34
use crate::jit::assembler::basic_block::BasicBlock;
45
use crate::jit::assembler::block_inst::{BlockAluOp, BlockAluSetCond, BlockInst, BlockSystemRegOp, BlockTransferOp, BranchEncoding, GuestInstInfo};
@@ -214,6 +215,16 @@ impl<'a> BlockAsm<'a> {
214215
if op2.operand.needs_reg_for_imm(0xFF) {
215216
debug_assert_eq!(op2.shift, BlockShift::default());
216217
let imm = op2.operand.as_imm();
218+
219+
let msb_ones = (imm.leading_ones() + 0x1) & !0x1;
220+
if msb_ones != 0 {
221+
let ror_imm = (imm << msb_ones) | (imm >> (32 - msb_ones));
222+
if ror_imm & !0xFF == 0 {
223+
*op2 = (ror_imm, ShiftType::Ror, msb_ones >> 1).into();
224+
return;
225+
}
226+
}
227+
217228
let lsb_zeros = imm.trailing_zeros() & !0x1;
218229
if (imm >> lsb_zeros) & !0xFF == 0 {
219230
*op2 = (imm >> lsb_zeros, ShiftType::Ror, (32 - lsb_zeros) >> 1).into();
@@ -258,30 +269,38 @@ impl<'a> BlockAsm<'a> {
258269
}
259270

260271
pub fn load_u8(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
261-
self.transfer(BlockTransferOp::Read, op0, op1, op2, false, MemoryAmount::Byte)
272+
self.transfer_read(op0, op1, op2, false, MemoryAmount::Byte)
262273
}
263274

264275
pub fn store_u8(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
265-
self.transfer(BlockTransferOp::Write, op0, op1, op2, false, MemoryAmount::Byte)
276+
self.transfer_write(op0, op1, op2, false, MemoryAmount::Byte)
266277
}
267278

268279
pub fn load_u16(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
269-
self.transfer(BlockTransferOp::Read, op0, op1, op2, false, MemoryAmount::Half)
280+
self.transfer_read(op0, op1, op2, false, MemoryAmount::Half)
270281
}
271282

272283
pub fn store_u16(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
273-
self.transfer(BlockTransferOp::Write, op0, op1, op2, false, MemoryAmount::Half)
284+
self.transfer_write(op0, op1, op2, false, MemoryAmount::Half)
274285
}
275286

276287
pub fn load_u32(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
277-
self.transfer(BlockTransferOp::Read, op0, op1, op2, false, MemoryAmount::Word)
288+
self.transfer_read(op0, op1, op2, false, MemoryAmount::Word)
278289
}
279290

280291
pub fn store_u32(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>) {
281-
self.transfer(BlockTransferOp::Write, op0, op1, op2, false, MemoryAmount::Word)
292+
self.transfer_write(op0, op1, op2, false, MemoryAmount::Word)
293+
}
294+
295+
pub fn transfer_read(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>, signed: bool, amount: MemoryAmount) {
296+
self.transfer(BlockTransferOp::Read, op0, op1, op2, signed, amount)
282297
}
283298

284-
fn transfer(&mut self, op: BlockTransferOp, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>, signed: bool, amount: MemoryAmount) {
299+
pub fn transfer_write(&mut self, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>, signed: bool, amount: MemoryAmount) {
300+
self.transfer(BlockTransferOp::Write, op0, op1, op2, signed, amount)
301+
}
302+
303+
pub fn transfer(&mut self, op: BlockTransferOp, op0: impl Into<BlockReg>, op1: impl Into<BlockReg>, op2: impl Into<BlockOperandShift>, signed: bool, amount: MemoryAmount) {
285304
let mut op2 = op2.into();
286305
if op2.operand.needs_reg_for_imm(0xFFF) {
287306
self.mov(self.tmp_operand_imm_reg, op2.operand);
@@ -1003,6 +1022,11 @@ impl<'a> BlockAsm<'a> {
10031022
pub fn finalize(&mut self, jit_mem_offset: usize) -> &Vec<u32> {
10041023
for &branch_placeholder in &self.buf.branch_placeholders {
10051024
let encoding = BranchEncoding::from(self.buf.opcodes[branch_placeholder]);
1025+
if Cond::from(u8::from(encoding.cond())) == Cond::NV {
1026+
self.buf.opcodes[branch_placeholder] = AluShiftImm::mov_al(Reg::R0, Reg::R0);
1027+
continue;
1028+
}
1029+
10061030
let diff = if encoding.is_call_common() {
10071031
let opcode_index = (jit_mem_offset >> 2) + branch_placeholder;
10081032
let branch_to = u32::from(encoding.index()) >> 2;

src/jit/assembler/block_reg_allocator.rs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::bitset::Bitset;
12
use crate::jit::assembler::block_asm::BLOCK_LOG;
23
use crate::jit::assembler::block_inst::{BlockAluOp, BlockAluSetCond, BlockInst, BlockInstKind, BlockTransferOp};
34
use crate::jit::assembler::block_reg_set::BlockRegSet;
@@ -251,7 +252,7 @@ impl BlockRegAllocator {
251252
}
252253
}
253254

254-
fn relocate_guest_regs(&mut self, guest_regs: RegReserve, live_ranges: &[BlockRegSet], is_input: bool) {
255+
fn relocate_guest_regs(&mut self, guest_regs: RegReserve, live_ranges: &[BlockRegSet], inputs: &BlockRegSet, used_regs: &[BlockRegSet], is_input: bool) {
255256
let mut relocatable_regs = RegReserve::new();
256257
for guest_reg in guest_regs {
257258
if self.stored_mapping[guest_reg as usize] != guest_reg
@@ -262,19 +263,23 @@ impl BlockRegAllocator {
262263
}
263264
}
264265

266+
let mut spilled_regs = Vec::new();
265267
for guest_reg in relocatable_regs {
266268
if let Some(currently_used_by) = self.stored_mapping_reverse[guest_reg as usize] {
267269
self.spilled += BlockReg::Any(currently_used_by);
268270
self.gen_pre_handle_spilled_inst(currently_used_by, guest_reg, BlockTransferOp::Write);
269271
self.remove_stored_mapping(currently_used_by);
272+
spilled_regs.push((BlockReg::Any(currently_used_by), guest_reg, self.pre_allocate_insts.len() - 1));
270273
}
271274
}
272275

276+
let mut dirty_regs = RegReserve::new();
273277
for guest_reg in relocatable_regs {
274278
let reg_mapped = self.stored_mapping[guest_reg as usize];
275279
if reg_mapped != Reg::None {
276280
if is_input {
277281
self.gen_pre_move_reg(guest_reg, reg_mapped);
282+
dirty_regs += guest_reg;
278283
}
279284
self.remove_stored_mapping(guest_reg as u16);
280285
self.set_stored_mapping(guest_reg as u16, guest_reg);
@@ -287,9 +292,34 @@ impl BlockRegAllocator {
287292
debug_assert!(self.spilled.contains(BlockReg::Any(guest_reg as u16)));
288293
self.spilled -= BlockReg::Any(guest_reg as u16);
289294
self.gen_pre_handle_spilled_inst(guest_reg as u16, guest_reg, BlockTransferOp::Read);
295+
dirty_regs += guest_reg;
290296
}
291297
self.set_stored_mapping(guest_reg as u16, guest_reg);
292298
}
299+
300+
let mut new_pre_allocate_insts_filter = Bitset::<1>::new();
301+
for (spilled_reg, previous_mapping, pre_allocate_index) in spilled_regs {
302+
if inputs.contains(spilled_reg) && !dirty_regs.is_reserved(previous_mapping) {
303+
let reg = if live_ranges.last().unwrap().contains(spilled_reg) {
304+
self.allocate_reg(spilled_reg.as_any(), live_ranges, used_regs)
305+
} else {
306+
self.allocate_local(spilled_reg.as_any(), live_ranges, used_regs)
307+
};
308+
self.spilled -= spilled_reg;
309+
self.gen_pre_move_reg(reg, previous_mapping);
310+
new_pre_allocate_insts_filter += pre_allocate_index;
311+
}
312+
}
313+
314+
if !new_pre_allocate_insts_filter.is_empty() {
315+
let mut new_pre_allocate_insts = Vec::new();
316+
for (i, inst) in self.pre_allocate_insts.iter().enumerate() {
317+
if !new_pre_allocate_insts_filter.contains(i) {
318+
new_pre_allocate_insts.push(inst.clone());
319+
}
320+
}
321+
self.pre_allocate_insts = new_pre_allocate_insts;
322+
}
293323
}
294324

295325
pub fn inst_allocate(&mut self, inst: &mut BlockInst, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) {
@@ -308,8 +338,8 @@ impl BlockRegAllocator {
308338
println!("inputs: {inputs:?}, outputs: {outputs:?}");
309339
}
310340

311-
self.relocate_guest_regs(inputs.get_guests().get_gp_regs(), live_ranges, true);
312-
self.relocate_guest_regs(outputs.get_guests().get_gp_regs(), live_ranges, false);
341+
self.relocate_guest_regs(inputs.get_guests().get_gp_regs(), live_ranges, &inputs, used_regs, true);
342+
self.relocate_guest_regs(outputs.get_guests().get_gp_regs(), live_ranges, &inputs, used_regs, false);
313343

314344
for any_input_reg in inputs.iter_any() {
315345
let reg = self.get_input_reg(any_input_reg, live_ranges, used_regs);

0 commit comments

Comments
 (0)