From 958d496eb6cac5bdbd636efc16fa97c7f2f7604a Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Tue, 1 Jul 2025 20:29:55 +0800 Subject: [PATCH 1/2] [LoongArch] Optimize inserting fp element bitconverted from integer --- llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 5 ++++- llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 5 ++++- .../LoongArch/lasx/ir-instruction/insert-bitcast-element.ll | 4 ---- .../LoongArch/lsx/ir-instruction/insert-bitcast-element.ll | 4 ---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ff7b0f2ae3f25..66476606bb3f8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1593,7 +1593,10 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; - +def : Pat<(vector_insert v8f32:$vd, (loongarch_movgr2fr_w_la64 GPR:$rj), uimm3:$imm), + (XVINSGR2VR_W $vd, $rj, uimm3:$imm)>; +def : Pat<(vector_insert v4f64:$vd, (f64 (bitconvert i64:$rj)), uimm2:$imm), + (XVINSGR2VR_D $vd, $rj, uimm2:$imm)>; def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index d73d78083ddcd..7bbe6fc972aaf 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1791,7 +1791,10 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; - +def : Pat<(vector_insert v4f32:$vd, (loongarch_movgr2fr_w_la64 GPR:$rj), uimm2:$imm), + (VINSGR2VR_W $vd, $rj, uimm2:$imm)>; +def : Pat<(vector_insert v2f64:$vd, (f64 (bitconvert i64:$rj)), uimm1:$imm), + (VINSGR2VR_D $vd, $rj, uimm1:$imm)>; def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll index 7b2461b11f12d..b37b525981fd9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll @@ -4,8 +4,6 @@ define <8 x float> @insert_bitcast_v8f32(<8 x float> %a, i32 %b) nounwind { ; CHECK-LABEL: insert_bitcast_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movgr2fr.w $fa1, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa1 ; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 ; CHECK-NEXT: ret entry: @@ -17,8 +15,6 @@ entry: define <4 x double> @insert_bitcast_v4f64(<4 x double> %a, i64 %b) nounwind { ; CHECK-LABEL: insert_bitcast_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movgr2fr.d $fa1, $a0 -; CHECK-NEXT: movfr2gr.d $a0, $fa1 ; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll index a20d17efdfb11..c42e3013c1131 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll @@ -4,8 +4,6 @@ define <4 x float> @insert_bitcast_v4f32(<4 x float> %a, i32 %b) nounwind { ; CHECK-LABEL: insert_bitcast_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movgr2fr.w $fa1, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa1 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; CHECK-NEXT: ret entry: @@ -17,8 +15,6 @@ entry: define <2 x double> @insert_bitcast_v2f64(<2 x double> %a, i64 %b) nounwind { ; CHECK-LABEL: insert_bitcast_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movgr2fr.d $fa1, $a0 -; CHECK-NEXT: movfr2gr.d $a0, $fa1 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 ; CHECK-NEXT: ret entry: From 6c15292805e94d4d558d96d521698fd89a5e6512 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Sat, 5 Jul 2025 14:41:10 +0800 Subject: [PATCH 2/2] [LoongArch] Optimize bitcasting vector-extracted fp element to integer --- llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td | 2 +- llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 6 ++++++ llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 6 ++++++ .../lasx/ir-instruction/bitcast-extract-element.ll | 4 ---- .../LoongArch/lsx/ir-instruction/bitcast-extract-element.ll | 6 ++---- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index d5a5f17348e4b..36c3011be2b9e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file describes the baisc single-precision floating-point instructions. +// This file describes the basic single-precision floating-point instructions. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 66476606bb3f8..7094ba101968f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1793,6 +1793,12 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { def : RegRegStPat; } +// Bitcast float/double element extracted from vector to integer. +def : Pat<(loongarch_movfr2gr_s_la64 (f32 (vector_extract v8f32:$xj, uimm3:$imm))), + (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm)>; +def : Pat<(i64 (bitconvert (f64 (vector_extract v4f64:$xj, uimm2:$imm)))), + (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm)>; + // Vector extraction with constant index. def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 7bbe6fc972aaf..d658b039c83cd 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1993,6 +1993,12 @@ foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { def : RegRegStPat; } +// Bitcast float/double element extracted from vector to integer. +def : Pat<(loongarch_movfr2gr_s_la64 (f32 (vector_extract v4f32:$vj, uimm2:$imm))), + (VPICKVE2GR_W v4f32:$vj, uimm2:$imm)>; +def : Pat<(i64 (bitconvert (f64 (vector_extract v2f64:$vj, uimm1:$imm)))), + (VPICKVE2GR_D v2f64:$vj, uimm1:$imm)>; + // Vector extraction with constant index. def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll index 86808c7a8f014..09ce1a04d6c9d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll @@ -5,8 +5,6 @@ define i32 @bitcast_extract_v8f32(<8 x float> %a) nounwind { ; CHECK-LABEL: bitcast_extract_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 ; CHECK-NEXT: ret entry: %b = extractelement <8 x float> %a, i32 7 @@ -18,8 +16,6 @@ define i64 @bitcast_extract_v4f64(<4 x double> %a) nounwind { ; CHECK-LABEL: bitcast_extract_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: movfr2gr.d $a0, $fa0 ; CHECK-NEXT: ret entry: %b = extractelement <4 x double> %a, i32 3 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll index df4896d7ec936..9a40feb45671f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll @@ -4,8 +4,7 @@ define i32 @bitcast_extract_v4f32(<4 x float> %a) nounwind { ; CHECK-LABEL: bitcast_extract_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 ; CHECK-NEXT: ret entry: %b = extractelement <4 x float> %a, i32 3 @@ -16,8 +15,7 @@ entry: define i64 @bitcast_extract_v2f64(<2 x double> %a) nounwind { ; CHECK-LABEL: bitcast_extract_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 ; CHECK-NEXT: ret entry: %b = extractelement <2 x double> %a, i32 1