2
2
//!
3
3
//! TODO: opportunities for better code generation:
4
4
//!
5
- //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns; recognize
6
- //! and incorporate sign/zero extension on indices. Recognize pre/post-index
7
- //! opportunities.
5
+ //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns. Recognize
6
+ //! pre/post-index opportunities.
8
7
//!
9
8
//! - Floating-point immediates (FIMM instruction).
10
9
@@ -21,8 +20,9 @@ use crate::isa::aarch64::AArch64Backend;
21
20
22
21
use super :: lower_inst;
23
22
24
- use log:: debug;
23
+ use log:: { debug, trace } ;
25
24
use regalloc:: { Reg , RegClass , Writable } ;
25
+ use smallvec:: SmallVec ;
26
26
27
27
//============================================================================
28
28
// Result enum types.
@@ -573,105 +573,251 @@ pub(crate) fn alu_inst_immshift(
573
573
// Lowering: addressing mode support. Takes instruction directly, rather
574
574
// than an `InsnInput`, to do more introspection.
575
575
576
+ /// 32-bit addends that make up an address: an input, and an extension mode on that
577
+ /// input.
578
+ type AddressAddend32List = SmallVec < [ ( Reg , ExtendOp ) ; 4 ] > ;
579
+ /// 64-bit addends that make up an address: just an input.
580
+ type AddressAddend64List = SmallVec < [ Reg ; 4 ] > ;
581
+
582
+ /// Collect all addends that feed into an address computation, with extend-modes
583
+ /// on each. Note that a load/store may have multiple address components (and
584
+ /// the CLIF semantics are that these components are added to form the final
585
+ /// address), but sometimes the CLIF that we receive still has arguments that
586
+ /// refer to `iadd` instructions. We also want to handle uextend/sextend below
587
+ /// the add(s).
588
+ ///
589
+ /// We match any 64-bit add (and descend into its inputs), and we match any
590
+ /// 32-to-64-bit sign or zero extension. The returned addend-list will use
591
+ /// NarrowValueMode values to indicate how to extend each input:
592
+ ///
593
+ /// - NarrowValueMode::None: the associated input is 64 bits wide; no extend.
594
+ /// - NarrowValueMode::SignExtend64: the associated input is 32 bits wide;
595
+ /// do a sign-extension.
596
+ /// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
597
+ /// do a zero-extension.
598
+ ///
599
+ /// We do not descend further into the inputs of extensions, because supporting
600
+ /// (e.g.) a 32-bit add that is later extended would require additional masking
601
+ /// of high-order bits, which is too complex. So, in essence, we descend any
602
+ /// number of adds from the roots, collecting all 64-bit address addends; then
603
+ /// possibly support extensions at these leaves.
604
+ fn collect_address_addends < C : LowerCtx < I = Inst > > (
605
+ ctx : & mut C ,
606
+ roots : & [ InsnInput ] ,
607
+ ) -> ( AddressAddend64List , AddressAddend32List , i64 ) {
608
+ let mut result32: AddressAddend32List = SmallVec :: new ( ) ;
609
+ let mut result64: AddressAddend64List = SmallVec :: new ( ) ;
610
+ let mut offset: i64 = 0 ;
611
+
612
+ let mut workqueue: SmallVec < [ InsnInput ; 4 ] > = roots. iter ( ) . cloned ( ) . collect ( ) ;
613
+
614
+ while let Some ( input) = workqueue. pop ( ) {
615
+ debug_assert ! ( ty_bits( ctx. input_ty( input. insn, input. input) ) == 64 ) ;
616
+ if let Some ( ( op, insn) ) = maybe_input_insn_multi (
617
+ ctx,
618
+ input,
619
+ & [
620
+ Opcode :: Uextend ,
621
+ Opcode :: Sextend ,
622
+ Opcode :: Iadd ,
623
+ Opcode :: Iconst ,
624
+ ] ,
625
+ ) {
626
+ match op {
627
+ Opcode :: Uextend | Opcode :: Sextend if ty_bits ( ctx. input_ty ( insn, 0 ) ) == 32 => {
628
+ let extendop = if op == Opcode :: Uextend {
629
+ ExtendOp :: UXTW
630
+ } else {
631
+ ExtendOp :: SXTW
632
+ } ;
633
+ let extendee_input = InsnInput { insn, input : 0 } ;
634
+ let reg = put_input_in_reg ( ctx, extendee_input, NarrowValueMode :: None ) ;
635
+ result32. push ( ( reg, extendop) ) ;
636
+ }
637
+ Opcode :: Uextend | Opcode :: Sextend => {
638
+ let reg = put_input_in_reg ( ctx, input, NarrowValueMode :: None ) ;
639
+ result64. push ( reg) ;
640
+ }
641
+ Opcode :: Iadd => {
642
+ for input in 0 ..ctx. num_inputs ( insn) {
643
+ let addend = InsnInput { insn, input } ;
644
+ workqueue. push ( addend) ;
645
+ }
646
+ }
647
+ Opcode :: Iconst => {
648
+ let value: i64 = ctx. get_constant ( insn) . unwrap ( ) as i64 ;
649
+ offset += value;
650
+ }
651
+ _ => panic ! ( "Unexpected opcode from maybe_input_insn_multi" ) ,
652
+ }
653
+ } else {
654
+ let reg = put_input_in_reg ( ctx, input, NarrowValueMode :: ZeroExtend64 ) ;
655
+ result64. push ( reg) ;
656
+ }
657
+ }
658
+
659
+ ( result64, result32, offset)
660
+ }
661
+
576
662
/// Lower the address of a load or store.
577
663
pub ( crate ) fn lower_address < C : LowerCtx < I = Inst > > (
578
664
ctx : & mut C ,
579
665
elem_ty : Type ,
580
- addends : & [ InsnInput ] ,
666
+ roots : & [ InsnInput ] ,
581
667
offset : i32 ,
582
668
) -> MemArg {
583
669
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
584
670
// mul instructions (Load/StoreComplex don't include scale factors).
585
671
586
- // Handle one reg and offset.
587
- if addends. len ( ) == 1 {
588
- let reg = put_input_in_reg ( ctx, addends[ 0 ] , NarrowValueMode :: ZeroExtend64 ) ;
589
- return MemArg :: RegOffset ( reg, offset as i64 , elem_ty) ;
590
- }
672
+ // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
673
+ // extends and addition ops. We update these as we consume address
674
+ // components, so they represent the remaining addends not yet handled.
675
+ let ( mut addends64, mut addends32, args_offset) = collect_address_addends ( ctx, roots) ;
676
+ let mut offset = args_offset + ( offset as i64 ) ;
677
+
678
+ trace ! (
679
+ "lower_address: addends64 {:?}, addends32 {:?}, offset {}" ,
680
+ addends64,
681
+ addends32,
682
+ offset
683
+ ) ;
591
684
592
- // Handle two regs and a zero offset with built-in extend, if possible.
593
- if addends. len ( ) == 2 && offset == 0 {
594
- // r1, r2 (to be extended), r2_bits, is_signed
595
- let mut parts: Option < ( Reg , Reg , usize , bool ) > = None ;
596
- // Handle extension of either first or second addend.
597
- for i in 0 ..2 {
598
- if let Some ( ( op, ext_insn) ) =
599
- maybe_input_insn_multi ( ctx, addends[ i] , & [ Opcode :: Uextend , Opcode :: Sextend ] )
600
- {
601
- // Non-extended addend.
602
- let r1 = put_input_in_reg ( ctx, addends[ 1 - i] , NarrowValueMode :: ZeroExtend64 ) ;
603
- // Extended addend.
604
- let r2 = put_input_in_reg (
605
- ctx,
606
- InsnInput {
607
- insn : ext_insn,
608
- input : 0 ,
609
- } ,
610
- NarrowValueMode :: None ,
611
- ) ;
612
- let r2_bits = ty_bits ( ctx. input_ty ( ext_insn, 0 ) ) ;
613
- parts = Some ( (
614
- r1,
615
- r2,
616
- r2_bits,
617
- /* is_signed = */ op == Opcode :: Sextend ,
618
- ) ) ;
619
- break ;
620
- }
685
+ // First, decide what the `MemArg` will be. Take one extendee and one 64-bit
686
+ // reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension,
687
+ // or some other combination as appropriate.
688
+ let memarg = if addends64. len ( ) > 0 {
689
+ if addends32. len ( ) > 0 {
690
+ let ( reg32, extendop) = addends32. pop ( ) . unwrap ( ) ;
691
+ let reg64 = addends64. pop ( ) . unwrap ( ) ;
692
+ MemArg :: RegExtended ( reg64, reg32, extendop)
693
+ } else if offset > 0 && offset < 0x1000 {
694
+ let reg64 = addends64. pop ( ) . unwrap ( ) ;
695
+ let off = offset;
696
+ offset = 0 ;
697
+ MemArg :: RegOffset ( reg64, off, elem_ty)
698
+ } else if addends64. len ( ) >= 2 {
699
+ let reg1 = addends64. pop ( ) . unwrap ( ) ;
700
+ let reg2 = addends64. pop ( ) . unwrap ( ) ;
701
+ MemArg :: RegReg ( reg1, reg2)
702
+ } else {
703
+ let reg1 = addends64. pop ( ) . unwrap ( ) ;
704
+ MemArg :: reg ( reg1)
621
705
}
622
-
623
- if let Some ( ( r1, r2, r2_bits, is_signed) ) = parts {
624
- match ( r2_bits, is_signed) {
625
- ( 32 , false ) => {
626
- return MemArg :: RegExtended ( r1, r2, ExtendOp :: UXTW ) ;
627
- }
628
- ( 32 , true ) => {
629
- return MemArg :: RegExtended ( r1, r2, ExtendOp :: SXTW ) ;
630
- }
631
- _ => { }
706
+ } else
707
+ /* addends64.len() == 0 */
708
+ {
709
+ if addends32. len ( ) > 0 {
710
+ let tmp = ctx. alloc_tmp ( RegClass :: I64 , I64 ) ;
711
+ let ( reg1, extendop) = addends32. pop ( ) . unwrap ( ) ;
712
+ let signed = match extendop {
713
+ ExtendOp :: SXTW => true ,
714
+ ExtendOp :: UXTW => false ,
715
+ _ => unreachable ! ( ) ,
716
+ } ;
717
+ ctx. emit ( Inst :: Extend {
718
+ rd : tmp,
719
+ rn : reg1,
720
+ signed,
721
+ from_bits : 32 ,
722
+ to_bits : 64 ,
723
+ } ) ;
724
+ if let Some ( ( reg2, extendop) ) = addends32. pop ( ) {
725
+ MemArg :: RegExtended ( tmp. to_reg ( ) , reg2, extendop)
726
+ } else {
727
+ MemArg :: reg ( tmp. to_reg ( ) )
632
728
}
729
+ } else
730
+ /* addends32.len() == 0 */
731
+ {
732
+ let off_reg = ctx. alloc_tmp ( RegClass :: I64 , I64 ) ;
733
+ lower_constant_u64 ( ctx, off_reg, offset as u64 ) ;
734
+ offset = 0 ;
735
+ MemArg :: reg ( off_reg. to_reg ( ) )
633
736
}
634
- }
737
+ } ;
635
738
636
- // Handle two regs and a zero offset in the general case, if possible.
637
- if addends. len ( ) == 2 && offset == 0 {
638
- let ra = put_input_in_reg ( ctx, addends[ 0 ] , NarrowValueMode :: ZeroExtend64 ) ;
639
- let rb = put_input_in_reg ( ctx, addends[ 1 ] , NarrowValueMode :: ZeroExtend64 ) ;
640
- return MemArg :: reg_plus_reg ( ra, rb) ;
739
+ // At this point, if we have any remaining components, we need to allocate a
740
+ // temp, replace one of the registers in the MemArg with the temp, and emit
741
+ // instructions to add together the remaining components. Return immediately
742
+ // if this is *not* the case.
743
+ if offset == 0 && addends32. len ( ) == 0 && addends64. len ( ) == 0 {
744
+ return memarg;
641
745
}
642
746
643
- // Otherwise, generate add instructions .
747
+ // Allocate the temp and shoehorn it into the MemArg .
644
748
let addr = ctx. alloc_tmp ( RegClass :: I64 , I64 ) ;
749
+ let ( reg, memarg) = match memarg {
750
+ MemArg :: RegExtended ( r1, r2, extendop) => {
751
+ ( r1, MemArg :: RegExtended ( addr. to_reg ( ) , r2, extendop) )
752
+ }
753
+ MemArg :: RegOffset ( r, off, ty) => ( r, MemArg :: RegOffset ( addr. to_reg ( ) , off, ty) ) ,
754
+ MemArg :: RegReg ( r1, r2) => ( r2, MemArg :: RegReg ( addr. to_reg ( ) , r1) ) ,
755
+ MemArg :: UnsignedOffset ( r, imm) => ( r, MemArg :: UnsignedOffset ( addr. to_reg ( ) , imm) ) ,
756
+ _ => unreachable ! ( ) ,
757
+ } ;
645
758
646
- // Get the const into a reg.
647
- lower_constant_u64 ( ctx, addr. clone ( ) , offset as u64 ) ;
648
-
649
- // Add each addend to the address.
650
- for addend in addends {
651
- let reg = put_input_in_reg ( ctx, * addend, NarrowValueMode :: ZeroExtend64 ) ;
759
+ // If there is any offset, load that first into `addr`, and add the `reg`
760
+ // that we kicked out of the `MemArg`; otherwise, start with that reg.
761
+ if offset != 0 {
762
+ // If we can fit offset or -offset in an imm12, use an add-imm
763
+ // to combine the reg and offset. Otherwise, load value first then add.
764
+ if let Some ( imm12) = Imm12 :: maybe_from_u64 ( offset as u64 ) {
765
+ ctx. emit ( Inst :: AluRRImm12 {
766
+ alu_op : ALUOp :: Add64 ,
767
+ rd : addr,
768
+ rn : reg,
769
+ imm12,
770
+ } ) ;
771
+ } else if let Some ( imm12) = Imm12 :: maybe_from_u64 ( offset. wrapping_neg ( ) as u64 ) {
772
+ ctx. emit ( Inst :: AluRRImm12 {
773
+ alu_op : ALUOp :: Sub64 ,
774
+ rd : addr,
775
+ rn : reg,
776
+ imm12,
777
+ } ) ;
778
+ } else {
779
+ lower_constant_u64 ( ctx, addr, offset as u64 ) ;
780
+ ctx. emit ( Inst :: AluRRR {
781
+ alu_op : ALUOp :: Add64 ,
782
+ rd : addr,
783
+ rn : addr. to_reg ( ) ,
784
+ rm : reg,
785
+ } ) ;
786
+ }
787
+ } else {
788
+ ctx. emit ( Inst :: gen_move ( addr, reg, I64 ) ) ;
789
+ }
652
790
653
- // In an addition, the stack register is the zero register, so divert it to another
654
- // register just before doing the actual add.
791
+ // Now handle reg64 and reg32-extended components.
792
+ for reg in addends64 {
793
+ // If the register is the stack reg, we must move it to another reg
794
+ // before adding it.
655
795
let reg = if reg == stack_reg ( ) {
656
796
let tmp = ctx. alloc_tmp ( RegClass :: I64 , I64 ) ;
657
- ctx. emit ( Inst :: Mov {
658
- rd : tmp,
659
- rm : stack_reg ( ) ,
660
- } ) ;
797
+ ctx. emit ( Inst :: gen_move ( tmp, stack_reg ( ) , I64 ) ) ;
661
798
tmp. to_reg ( )
662
799
} else {
663
800
reg
664
801
} ;
665
-
666
802
ctx. emit ( Inst :: AluRRR {
667
803
alu_op : ALUOp :: Add64 ,
668
- rd : addr. clone ( ) ,
804
+ rd : addr,
805
+ rn : addr. to_reg ( ) ,
806
+ rm : reg,
807
+ } ) ;
808
+ }
809
+ for ( reg, extendop) in addends32 {
810
+ assert ! ( reg != stack_reg( ) ) ;
811
+ ctx. emit ( Inst :: AluRRRExtend {
812
+ alu_op : ALUOp :: Add64 ,
813
+ rd : addr,
669
814
rn : addr. to_reg ( ) ,
670
- rm : reg. clone ( ) ,
815
+ rm : reg,
816
+ extendop,
671
817
} ) ;
672
818
}
673
819
674
- MemArg :: reg ( addr . to_reg ( ) )
820
+ memarg
675
821
}
676
822
677
823
pub ( crate ) fn lower_constant_u64 < C : LowerCtx < I = Inst > > (
0 commit comments