diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19711,8 +19711,11 @@ // extract. SDValue Op0 = Vec.getOperand(0); SDValue Op1 = Vec.getOperand(1); + APInt SplatVal; if (isAnyConstantBuildVector(Op0, true) || - isAnyConstantBuildVector(Op1, true)) { + ISD::isConstantSplatVector(Op0.getNode(), SplatVal) || + isAnyConstantBuildVector(Op1, true) || + ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) { // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) SDLoc DL(ExtElt); @@ -19789,6 +19792,9 @@ // converts. } + if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) + return BO; + if (VecVT.isScalableVector()) return SDValue(); @@ -19834,9 +19840,6 @@ } } - if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) - return BO; - // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll @@ -487,11 +487,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI45_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI45_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fadd.s fa0, ft1, ft0 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -505,11 +504,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI46_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fsub.s fa0, ft0, ft1 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -523,11 +521,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI47_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fmul.s fa0, ft1, ft0 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -541,9 +538,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI48_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, mu +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fdiv.s fa0, ft1, ft0 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll @@ -516,11 +516,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI47_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fadd.s fa0, ft1, ft0 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -534,11 +533,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI48_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fsub.s fa0, ft0, ft1 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -552,11 +550,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI49_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fmul.s fa0, ft1, ft0 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -570,9 +567,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI50_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI50_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, mu +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fdiv.s fa0, ft1, ft0 ; CHECK-NEXT: ret %head = insertelement poison, float 3.0, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32NOM +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32M + define signext i8 @extractelt_nxv1i8_0( %v) { ; CHECK-LABEL: extractelt_nxv1i8_0: @@ -745,11 +748,10 @@ define i32 @extractelt_add_nxv4i32_splat( %x) { ; CHECK-LABEL: extractelt_add_nxv4i32_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vadd.vi v8, v8, 3 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: addi a0, a0, 3 ; CHECK-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -761,11 +763,11 @@ define i32 @extractelt_sub_nxv4i32_splat( %x) { ; CHECK-LABEL: extractelt_sub_nxv4i32_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vrsub.vi v8, v8, 3 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 3 +; CHECK-NEXT: sub a0, a1, a0 ; CHECK-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -775,15 +777,24 @@ } define i32 @extractelt_mul_nxv4i32_splat( %x) { -; CHECK-LABEL: extractelt_mul_nxv4i32_splat: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; CHECK-NEXT: vmul.vx v8, v8, a0 -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32NOM-LABEL: extractelt_mul_nxv4i32_splat: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: li a0, 3 +; RV32NOM-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV32NOM-NEXT: vmul.vx v8, v8, a0 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; RV32NOM-NEXT: vslidedown.vi v8, v8, 3 +; RV32NOM-NEXT: vmv.x.s a0, v8 +; RV32NOM-NEXT: ret +; +; RV32M-LABEL: extractelt_mul_nxv4i32_splat: +; RV32M: # %bb.0: +; RV32M-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; RV32M-NEXT: vslidedown.vi v8, v8, 3 +; RV32M-NEXT: vmv.x.s a0, v8 +; RV32M-NEXT: slli a1, a0, 1 +; RV32M-NEXT: add a0, a1, a0 +; RV32M-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer %bo = mul %x, %splat @@ -792,16 +803,27 @@ } define i32 @extractelt_sdiv_nxv4i32_splat( %x) { -; CHECK-LABEL: extractelt_sdiv_nxv4i32_splat: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 349525 -; CHECK-NEXT: addi a0, a0, 1366 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsrl.vi v10, v8, 31 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32NOM-LABEL: extractelt_sdiv_nxv4i32_splat: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: lui a0, 349525 +; RV32NOM-NEXT: addi a0, a0, 1366 +; RV32NOM-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV32NOM-NEXT: vmulh.vx v8, v8, a0 +; RV32NOM-NEXT: vsrl.vi v10, v8, 31 +; RV32NOM-NEXT: vadd.vv v8, v8, v10 +; RV32NOM-NEXT: vmv.x.s a0, v8 +; RV32NOM-NEXT: ret +; +; RV32M-LABEL: extractelt_sdiv_nxv4i32_splat: +; RV32M: # %bb.0: +; RV32M-NEXT: vsetivli zero, 0, e32, m2, ta, mu +; RV32M-NEXT: vmv.x.s a0, v8 +; RV32M-NEXT: lui a1, 349525 +; RV32M-NEXT: addi a1, a1, 1366 +; RV32M-NEXT: mulh a0, a0, a1 +; RV32M-NEXT: srli a1, a0, 31 +; RV32M-NEXT: add a0, a0, a1 +; RV32M-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer %bo = sdiv %x, %splat @@ -810,16 +832,27 @@ } define i32 @extractelt_udiv_nxv4i32_splat( %x) { -; CHECK-LABEL: extractelt_udiv_nxv4i32_splat: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 349525 -; CHECK-NEXT: addi a0, a0, 1366 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsrl.vi v10, v8, 31 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32NOM-LABEL: extractelt_udiv_nxv4i32_splat: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: lui a0, 349525 +; RV32NOM-NEXT: addi a0, a0, 1366 +; RV32NOM-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV32NOM-NEXT: vmulh.vx v8, v8, a0 +; RV32NOM-NEXT: vsrl.vi v10, v8, 31 +; RV32NOM-NEXT: vadd.vv v8, v8, v10 +; RV32NOM-NEXT: vmv.x.s a0, v8 +; RV32NOM-NEXT: ret +; +; RV32M-LABEL: extractelt_udiv_nxv4i32_splat: +; RV32M: # %bb.0: +; RV32M-NEXT: vsetivli zero, 0, e32, m2, ta, mu +; RV32M-NEXT: vmv.x.s a0, v8 +; RV32M-NEXT: lui a1, 349525 +; RV32M-NEXT: addi a1, a1, 1366 +; RV32M-NEXT: mulh a0, a0, a1 +; RV32M-NEXT: srli a1, a0, 31 +; RV32M-NEXT: add a0, a0, a1 +; RV32M-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer %bo = sdiv %x, %splat