diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1858,8 +1858,11 @@ // If the vector op is supported, but the scalar op is not, the transform may // not be worthwhile. + // Permit a vector binary operation can be converted to scalar binary + // operation which is custom lowered with illegal type. EVT ScalarVT = VecVT.getScalarType(); - return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); + return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) || + isOperationCustom(Opc, ScalarVT); } bool RISCVTargetLowering::isOffsetFoldingLegal( diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -709,11 +709,10 @@ define i32 @extractelt_add_nxv4i32_splat( %x) { ; CHECK-LABEL: extractelt_add_nxv4i32_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vadd.vi v8, v8, 3 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: addiw a0, a0, 3 ; CHECK-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -725,11 +724,11 @@ define i32 @extractelt_sub_nxv4i32_splat( %x) { ; CHECK-LABEL: extractelt_sub_nxv4i32_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vrsub.vi v8, v8, 3 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 3 +; CHECK-NEXT: subw a0, a1, a0 ; CHECK-NEXT: ret %head = insertelement poison, i32 3, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M define i8 @extractelt_v16i8(ptr %x) nounwind { ; CHECK-LABEL: extractelt_v16i8: @@ -615,10 +615,10 @@ ; ; RV64-LABEL: extractelt_add_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vadd.vi v8, v8, 13 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: addiw a0, a0, 13 ; RV64-NEXT: ret %bo = add <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 @@ -637,10 +637,11 @@ ; ; RV64-LABEL: extractelt_sub_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vrsub.vi v8, v8, 13 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: li a1, 13 +; RV64-NEXT: subw a0, a1, a0 ; RV64-NEXT: ret %bo = sub <4 x i32> , %x %ext = extractelement <4 x i32> %bo, i32 2 @@ -666,14 +667,23 @@ ; RV32M-NEXT: mul a0, a0, a1 ; RV32M-NEXT: ret ; -; RV64-LABEL: extractelt_mul_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 13 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; RV64NOM-LABEL: extractelt_mul_v4i32: +; RV64NOM: # %bb.0: +; RV64NOM-NEXT: li a0, 13 +; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64NOM-NEXT: vmul.vx v8, v8, a0 +; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 +; RV64NOM-NEXT: vmv.x.s a0, v8 +; RV64NOM-NEXT: ret +; +; RV64M-LABEL: extractelt_mul_v4i32: +; RV64M: # %bb.0: +; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64M-NEXT: vslidedown.vi v8, v8, 2 +; RV64M-NEXT: vmv.x.s a0, v8 +; RV64M-NEXT: li a1, 13 +; RV64M-NEXT: mulw a0, a0, a1 +; RV64M-NEXT: ret %bo = mul <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -715,27 +725,40 @@ ; RV32M-NEXT: add a0, a0, a1 ; RV32M-NEXT: ret ; -; RV64-LABEL: extractelt_sdiv_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vslideup.vi v10, v9, 3 -; RV64-NEXT: lui a0, %hi(.LCPI38_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI38_0) -; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: lui a0, %hi(.LCPI38_1) -; RV64-NEXT: addi a0, a0, %lo(.LCPI38_1) -; RV64-NEXT: vle32.v v11, (a0) -; RV64-NEXT: vand.vv v10, v8, v10 -; RV64-NEXT: vmulh.vv v8, v8, v9 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vsra.vv v8, v8, v11 -; RV64-NEXT: vsrl.vi v9, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; RV64NOM-LABEL: extractelt_sdiv_v4i32: +; RV64NOM: # %bb.0: +; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64NOM-NEXT: vmv.v.i v9, -1 +; RV64NOM-NEXT: vmv.v.i v10, 0 +; RV64NOM-NEXT: vslideup.vi v10, v9, 3 +; RV64NOM-NEXT: lui a0, %hi(.LCPI38_0) +; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI38_0) +; RV64NOM-NEXT: vle32.v v9, (a0) +; RV64NOM-NEXT: lui a0, %hi(.LCPI38_1) +; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI38_1) +; RV64NOM-NEXT: vle32.v v11, (a0) +; RV64NOM-NEXT: vand.vv v10, v8, v10 +; RV64NOM-NEXT: vmulh.vv v8, v8, v9 +; RV64NOM-NEXT: vadd.vv v8, v8, v10 +; RV64NOM-NEXT: vsra.vv v8, v8, v11 +; RV64NOM-NEXT: vsrl.vi v9, v8, 31 +; RV64NOM-NEXT: vadd.vv v8, v8, v9 +; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 +; RV64NOM-NEXT: vmv.x.s a0, v8 +; RV64NOM-NEXT: ret +; +; RV64M-LABEL: extractelt_sdiv_v4i32: +; RV64M: # %bb.0: +; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64M-NEXT: vslidedown.vi v8, v8, 2 +; RV64M-NEXT: vmv.x.s a0, v8 +; RV64M-NEXT: lui a1, 322639 +; RV64M-NEXT: addiw a1, a1, -945 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: srli a1, a0, 63 +; RV64M-NEXT: srai a0, a0, 34 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: ret %bo = sdiv <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -765,17 +788,31 @@ ; RV32M-NEXT: srli a0, a0, 2 ; RV32M-NEXT: ret ; -; RV64-LABEL: extractelt_udiv_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vsrl.vi v8, v8, 0 -; RV64-NEXT: lui a0, 322639 -; RV64-NEXT: addiw a0, a0, -945 -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; RV64NOM-LABEL: extractelt_udiv_v4i32: +; RV64NOM: # %bb.0: +; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64NOM-NEXT: vsrl.vi v8, v8, 0 +; RV64NOM-NEXT: lui a0, 322639 +; RV64NOM-NEXT: addiw a0, a0, -945 +; RV64NOM-NEXT: vmulhu.vx v8, v8, a0 +; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 +; RV64NOM-NEXT: vmv.x.s a0, v8 +; RV64NOM-NEXT: slli a0, a0, 33 +; RV64NOM-NEXT: srli a0, a0, 35 +; RV64NOM-NEXT: ret +; +; RV64M-LABEL: extractelt_udiv_v4i32: +; RV64M: # %bb.0: +; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64M-NEXT: vslidedown.vi v8, v8, 2 +; RV64M-NEXT: vmv.x.s a0, v8 +; RV64M-NEXT: slli a0, a0, 32 +; RV64M-NEXT: lui a1, 322639 +; RV64M-NEXT: addiw a1, a1, -945 +; RV64M-NEXT: slli a1, a1, 32 +; RV64M-NEXT: mulhu a0, a0, a1 +; RV64M-NEXT: srli a0, a0, 34 +; RV64M-NEXT: ret %bo = udiv <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext