diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -376,6 +376,7 @@ SelectionDAG &DAG) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + bool shouldScalarizeBinop(SDValue VecOp) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1317,6 +1317,25 @@ return true; } +bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { + unsigned Opc = VecOp.getOpcode(); + + // Assume target opcodes can't be scalarized. + // TODO - do we have any exceptions? + if (Opc >= ISD::BUILTIN_OP_END) + return false; + + // If the vector op is not supported, try to convert to scalar. + EVT VecVT = VecOp.getValueType(); + if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) + return true; + + // If the vector op is supported, but the scalar op is not, the transform may + // not be worthwhile. + EVT ScalarVT = VecVT.getScalarType(); + return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); +} + bool RISCVTargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode *GA) const { // In order to maximise the opportunity for common subexpression elimination, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define i8 @extractelt_v16i8(<16 x i8>* %x) nounwind { ; CHECK-LABEL: extractelt_v16i8: @@ -613,43 +615,79 @@ } define i32 @ext_add_v4i32(<4 x i32> %x) { -; CHECK-LABEL: ext_add_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vadd.vi v8, v8, 3 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: ext_add_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: ret +; +; RV64-LABEL: ext_add_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vadd.vi v8, v8, 3 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %bo = add <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext } define i32 @ext_sub_v4i32(<4 x i32> %x) { -; CHECK-LABEL: ext_sub_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vrsub.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: ext_sub_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 2 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: ext_sub_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vrsub.vi v8, v8, 2 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %bo = sub <4 x i32> , %x %ext = extractelement <4 x i32> %bo, i32 1 ret i32 %ext } define i32 @ext_mul_v4i32(<4 x i32> %x) { -; CHECK-LABEL: ext_mul_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 42 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmul.vx v8, v8, a0 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32NOM-LABEL: ext_mul_v4i32: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: li a0, 42 +; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32NOM-NEXT: vmul.vx v8, v8, a0 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32NOM-NEXT: vslidedown.vi v8, v8, 3 +; RV32NOM-NEXT: vmv.x.s a0, v8 +; RV32NOM-NEXT: ret +; +; RV32M-LABEL: ext_mul_v4i32: +; RV32M: # %bb.0: +; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32M-NEXT: vslidedown.vi v8, v8, 3 +; RV32M-NEXT: vmv.x.s a0, v8 +; RV32M-NEXT: li a1, 42 +; RV32M-NEXT: mul a0, a0, a1 +; RV32M-NEXT: ret +; +; RV64-LABEL: ext_mul_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: li a0, 42 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %bo = mul <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 3 ret i32 %ext @@ -667,17 +705,38 @@ } define i32 @ext_sdiv_v4i32_constant_op0(<4 x i32> %x) { -; CHECK-LABEL: ext_sdiv_v4i32_constant_op0: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI39_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vdiv.vv v8, v9, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32NOM-LABEL: ext_sdiv_v4i32_constant_op0: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: lui a0, %hi(.LCPI39_0) +; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI39_0) +; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32NOM-NEXT: vle32.v v9, (a0) +; RV32NOM-NEXT: vdiv.vv v8, v9, v8 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32NOM-NEXT: vslidedown.vi v8, v8, 1 +; RV32NOM-NEXT: vmv.x.s a0, v8 +; RV32NOM-NEXT: ret +; +; RV32M-LABEL: ext_sdiv_v4i32_constant_op0: +; RV32M: # %bb.0: +; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32M-NEXT: vslidedown.vi v8, v8, 1 +; RV32M-NEXT: vmv.x.s a0, v8 +; RV32M-NEXT: li a1, 2 +; RV32M-NEXT: div a0, a1, a0 +; RV32M-NEXT: ret +; +; RV64-LABEL: ext_sdiv_v4i32_constant_op0: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI39_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI39_0) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vle32.v v9, (a0) +; RV64-NEXT: vdiv.vv v8, v9, v8 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %bo = sdiv <4 x i32> , %x %ext = extractelement <4 x i32> %bo, i32 1 ret i32 %ext @@ -695,17 +754,38 @@ } define i32 @ext_udiv_v4i32_constant_op0(<4 x i32> %x) { -; CHECK-LABEL: ext_udiv_v4i32_constant_op0: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI41_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vdiv.vv v8, v9, v8 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: ret +; RV32NOM-LABEL: ext_udiv_v4i32_constant_op0: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: lui a0, %hi(.LCPI41_0) +; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI41_0) +; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32NOM-NEXT: vle32.v v9, (a0) +; RV32NOM-NEXT: vdiv.vv v8, v9, v8 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32NOM-NEXT: vslidedown.vi v8, v8, 1 +; RV32NOM-NEXT: vmv.x.s a0, v8 +; RV32NOM-NEXT: ret +; +; RV32M-LABEL: ext_udiv_v4i32_constant_op0: +; RV32M: # %bb.0: +; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV32M-NEXT: vslidedown.vi v8, v8, 1 +; RV32M-NEXT: vmv.x.s a0, v8 +; RV32M-NEXT: li a1, 2 +; RV32M-NEXT: div a0, a1, a0 +; RV32M-NEXT: ret +; +; RV64-LABEL: ext_udiv_v4i32_constant_op0: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI41_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI41_0) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vle32.v v9, (a0) +; RV64-NEXT: vdiv.vv v8, v9, v8 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret %bo = sdiv <4 x i32> , %x %ext = extractelement <4 x i32> %bo, i32 1 ret i32 %ext @@ -716,11 +796,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fadd.s fa0, ft1, ft0 ; CHECK-NEXT: ret %bo = fadd <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 2 @@ -732,11 +811,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI43_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI43_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fsub.s fa0, ft0, ft1 ; CHECK-NEXT: ret %bo = fsub <4 x float> , %x %ext = extractelement <4 x float> %bo, i32 1 @@ -748,11 +826,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI44_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI44_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fmul.s fa0, ft1, ft0 ; CHECK-NEXT: ret %bo = fmul <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 3 @@ -775,11 +852,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI46_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft1, v8 +; CHECK-NEXT: fdiv.s fa0, ft0, ft1 ; CHECK-NEXT: ret %bo = fdiv <4 x float> , %x %ext = extractelement <4 x float> %bo, i32 1