diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7786,9 +7786,11 @@ if (ScalarBits < EltBits) return SDValue(); + // If the LHS is a sign extend, try to use vwmul. if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) { // Can use vwmul. } else { + // Otherwise try to use vwmulu or vwmulsu. APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize); if (DAG.MaskedValueIsZero(Op1, Mask)) IsVWMULSU = IsSignExt; @@ -8438,6 +8440,16 @@ return Gather; break; } + case RISCVISD::VMV_V_X_VL: { + // VMV.V.X only demands the vector element bitwidth from the scalar input. + unsigned ScalarSize = N->getOperand(0).getValueSizeInBits(); + unsigned EltWidth = N->getValueType(0).getScalarSizeInBits(); + if (ScalarSize > EltWidth) + if (SimplifyDemandedLowBitsHelper(0, EltWidth)) + return SDValue(N, 0); + + break; + } } return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwmulsu_v2i16(<2 x i8>* %x, <2 x i8>* %y) { ; CHECK-LABEL: vwmulsu_v2i16: @@ -726,7 +726,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: lbu a0, 0(a1) -; CHECK-NEXT: vwmulsu.vx v8, v9, a0 +; CHECK-NEXT: vwmul.vx v8, v9, a0 ; CHECK-NEXT: ret %a = load <4 x i16>, <4 x i16>* %x %b = load i8, i8* %y @@ -779,7 +779,7 @@ ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: lbu a0, 0(a1) -; RV64-NEXT: vwmulsu.vx v8, v9, a0 +; RV64-NEXT: vwmul.vx v8, v9, a0 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i8, i8* %y @@ -814,7 +814,7 @@ ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: lhu a0, 0(a1) -; RV64-NEXT: vwmulsu.vx v8, v9, a0 +; RV64-NEXT: vwmul.vx v8, v9, a0 ; RV64-NEXT: ret %a = load <2 x i32>, <2 x i32>* %x %b = load i16, i16* %y @@ -881,11 +881,9 @@ ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: andi a0, a1, 254 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vsext.vf2 v9, v8 -; CHECK-NEXT: vmul.vx v8, v9, a0 +; CHECK-NEXT: vwmulsu.vx v8, v9, a0 ; CHECK-NEXT: ret %a = load <8 x i8>, <8 x i8>* %x %b = and i16 %y, 254 @@ -911,3 +909,19 @@ %f = mul <4 x i32> %d, %e ret <4 x i32> %f } + +define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(<4 x i16>* %x, i16 %y) { +; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vwmulsu.vx v8, v9, a1 +; CHECK-NEXT: ret + %a = load <4 x i16>, <4 x i16>* %x + %b = zext i16 %y to i32 + %c = insertelement <4 x i32> poison, i32 %b, i32 0 + %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer + %e = sext <4 x i16> %a to <4 x i32> + %f = mul <4 x i32> %d, %e + ret <4 x i32> %f +}