This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/RISCV/
-
Target/
-
RISCV/
1/3
RISCVISelLowering.cpp
-
test/CodeGen/RISCV/rvv/
-
CodeGen/
-
RISCV/
-
rvv/
-
fixed-vectors-vwmulsu.ll

Differential D119110

[RISCV] support vwmulsu_vx when one input is a scalar splat
AbandonedPublic

Authored by Chenbing.Zheng on Feb 7 2022, 12:04 AM.

Download Raw Diff

Details

Reviewers

craig.topper
arcbbb
HsiangKai
frasercrmck
benshi001

Summary

If one input of a fixed vector multiply is a sign extend and
the other operand is a splat of a scalar, we can use vwmulsu_vx
if the scalar value has sufficient zero bits.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

Chenbing.Zheng created this revision.Feb 7 2022, 12:04 AM

Herald added subscribers: VincentWu, luke957, achieveartificialintelligence and 26 others. · View Herald TranscriptFeb 7 2022, 12:04 AM

Chenbing.Zheng requested review of this revision.Feb 7 2022, 12:04 AM

Herald added subscribers: llvm-commits, • pcwang-thead, eopXD and 2 others. · View Herald TranscriptFeb 7 2022, 12:04 AM

Harbormaster completed remote builds in B147885: Diff 406334.Feb 7 2022, 12:05 AM

Chenbing.Zheng added a comment.Feb 7 2022, 12:13 AM

This comment was removed by Chenbing.Zheng.

Chenbing.Zheng updated this revision to Diff 406716.Feb 8 2022, 12:20 AM

This comment was removed by Chenbing.Zheng.

Harbormaster completed remote builds in B148173: Diff 406716.Feb 8 2022, 1:11 AM

frasercrmck added inline comments.Feb 8 2022, 8:01 AM

llvm/lib/Target/RISCV/RISCVISelLowering.cpp
7652–7654	The description says that we're now supporting scalar splats but AFAICT this will only work for zero-extending loads? Feels like maybe the testing you're adding is too narrowly-focused and dependent on the `load`.

Chenbing.Zheng added inline comments.Feb 9 2022, 12:19 AM

llvm/lib/Target/RISCV/RISCVISelLowering.cpp
7652–7654	I aggre with you. I am sorry about that I have no more ideal about other cases now. May I add a 'Fix me' here ?

craig.topper added inline comments.Feb 9 2022, 12:33 AM

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

7652–7654

Something like this should work.

define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i16 %b) {
  %a = load <8 x i8>, <8 x i8>* %x
  %c = and i16 %b, 255
  %d = insertelement <8 x i16> poison, i16 %c, i32 0
  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
  %f = sext <8 x i8> %a to <8 x i16>
  %g = mul <8 x i16> %e, %f
  ret <8 x i16> %g
}

deal with more tests

Harbormaster completed remote builds in B148672: Diff 407416.Feb 10 2022, 2:46 AM

craig.topper mentioned this in D119622: [RISCV] Match vwmulsu_vx with scalar splat input..Feb 11 2022, 11:43 PM

I posted an alternative version as D119622. It makes use of MaskedValueIsZero like is used for vwmulu. A new DAG combine for VMV_V_X_VL is used to remove unnecessary AND instructions.

In D119110#3316555, @craig.topper wrote:

I posted an alternative version as D119622. It makes use of MaskedValueIsZero like is used for vwmulu. A new DAG combine for VMV_V_X_VL is used to remove unnecessary AND instructions.

Thanks, it is a more generic version~

Chenbing.Zheng abandoned this revision.Feb 13 2022, 5:27 PM

craig.topper mentioned this in rGab6e02dded99: [RISCV] Match vwmulsu_vx with scalar splat input..Feb 15 2022, 8:50 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

RISCV/

RISCVISelLowering.cpp

32 lines

test/

CodeGen/

RISCV/

rvv/

fixed-vectors-vwmulsu.ll

234 lines

Diff 407416

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,556 Lines • ▼ Show 20 Lines	if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&

Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op0, VL);		Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op0, VL);
return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);		return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);
}		}

return SDValue();		return SDValue();
}		}

		// This function return true if OP is equivalent to zero-extend operation.
		// FixMe: There are more operations that need to be improved here.
		static bool isZeroExtOp(SDValue &Op, int ScalarBits, int NarrowSize,
		SelectionDAG &DAG) {
		// ZEXTLoad is a zero-extend operation.
		if (ISD::isZEXTLoad(Op.getNode())) {
		APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
		if (!DAG.MaskedValueIsZero(Op, Mask))
		return false;
		return true;
		}

		// ISD::AND may be a zero-extend operation.
		if (Op.getOpcode() == ISD::AND) {
		if (auto *AndRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
		int Val = (2 << (NarrowSize - 1)) - 1;
		if (AndRHS->getAPIntValue() == Val) {
		if (Op.hasOneUse())
		Op = Op.getOperand(0);
		return true;
		}
		}
		}

		return false;
		}

// Try to form VWMUL, VWMULU or VWMULSU.		// Try to form VWMUL, VWMULU or VWMULSU.
// TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,		static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
bool Commute) {		bool Commute) {
assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");		assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
SDValue Op0 = N->getOperand(0);		SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);		SDValue Op1 = N->getOperand(1);
if (Commute)		if (Commute)
std::swap(Op0, Op1);		std::swap(Op0, Op1);

▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	if (IsVWMULSU \|\| Op0.getOpcode() == Op1.getOpcode()) {
// widening multiply by splatting to smaller element size.		// widening multiply by splatting to smaller element size.
unsigned EltBits = VT.getScalarSizeInBits();		unsigned EltBits = VT.getScalarSizeInBits();
unsigned ScalarBits = Op1.getValueSizeInBits();		unsigned ScalarBits = Op1.getValueSizeInBits();
// Make sure we're getting all element bits from the scalar register.		// Make sure we're getting all element bits from the scalar register.
// FIXME: Support implicit sign extension of vmv.v.x?		// FIXME: Support implicit sign extension of vmv.v.x?
if (ScalarBits < EltBits)		if (ScalarBits < EltBits)
return SDValue();		return SDValue();

if (IsSignExt) {		if (IsSignExt && isZeroExtOp(Op1, ScalarBits, NarrowSize, DAG)) {
		IsVWMULSU = true;
		} else if (IsSignExt) {
		frasercrmckUnsubmitted Not Done Reply Inline Actions The description says that we're now supporting scalar splats but AFAICT this will only work for zero-extending loads? Feels like maybe the testing you're adding is too narrowly-focused and dependent on the `load`. frasercrmck: The description says that we're now supporting scalar splats but AFAICT this will only work for…
		Chenbing.ZhengAuthorUnsubmitted Done Reply Inline Actions I aggre with you. I am sorry about that I have no more ideal about other cases now. May I add a 'Fix me' here ? Chenbing.Zheng: I aggre with you. I am sorry about that I have no more ideal about other cases now. May I add a…
		craig.topperUnsubmitted Not Done Reply Inline Actions Something like this should work. define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i16 %b) { %a = load <8 x i8>, <8 x i8>* %x %c = and i16 %b, 255 %d = insertelement <8 x i16> poison, i16 %c, i32 0 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer %f = sext <8 x i8> %a to <8 x i16> %g = mul <8 x i16> %e, %f ret <8 x i16> %g } craig.topper: Something like this should work. ``` define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i16…
if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))		if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
return SDValue();		return SDValue();
} else {		} else {
APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);		APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
if (!DAG.MaskedValueIsZero(Op1, Mask))		if (!DAG.MaskedValueIsZero(Op1, Mask))
return SDValue();		return SDValue();
}		}

▲ Show 20 Lines • Show All 3,606 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \| FileCheck %s --check-prefixes=CHECK		; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \| FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \| FileCheck %s --check-prefixes=CHECK		; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \| FileCheck %s --check-prefixes=CHECK,RV64

define <2 x i16> @vwmulsu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {		define <2 x i16> @vwmulsu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
; CHECK-LABEL: vwmulsu_v2i16:		; CHECK-LABEL: vwmulsu_v2i16:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu		; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vle8.v v9, (a0)		; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v10, (a1)		; CHECK-NEXT: vle8.v v10, (a1)
; CHECK-NEXT: vwmulsu.vv v8, v10, v9		; CHECK-NEXT: vwmulsu.vv v8, v10, v9
▲ Show 20 Lines • Show All 664 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%a = load <16 x i32>, <16 x i32>* %x		%a = load <16 x i32>, <16 x i32>* %x
%b = insertelement <16 x i32> poison, i32 %y, i64 0		%b = insertelement <16 x i32> poison, i32 %y, i64 0
%c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer		%c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
%d = sext <16 x i32> %a to <16 x i64>		%d = sext <16 x i32> %a to <16 x i64>
%e = zext <16 x i32> %c to <16 x i64>		%e = zext <16 x i32> %c to <16 x i64>
%f = mul <16 x i64> %d, %e		%f = mul <16 x i64> %d, %e
ret <16 x i64> %f		ret <16 x i64> %f
}		}

		define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
		; CHECK-LABEL: vwmulsu_vx_v8i16_i8:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
		; CHECK-NEXT: vle8.v v9, (a0)
		; CHECK-NEXT: lbu a0, 0(a1)
		; CHECK-NEXT: vwmulsu.vx v8, v9, a0
		; CHECK-NEXT: ret
		%a = load <8 x i8>, <8 x i8>* %x
		%b = load i8, i8* %y
		%c = zext i8 %b to i16
		%d = insertelement <8 x i16> poison, i16 %c, i32 0
		%e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
		%f = sext <8 x i8> %a to <8 x i16>
		%g = mul <8 x i16> %e, %f
		ret <8 x i16> %g
		}

		define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(<8 x i8>* %x, i8* %y) {
		; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
		; CHECK-NEXT: vle8.v v8, (a0)
		; CHECK-NEXT: lb a0, 0(a1)
		; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
		; CHECK-NEXT: vzext.vf2 v9, v8
		; CHECK-NEXT: vmul.vx v8, v9, a0
		; CHECK-NEXT: ret
		%a = load <8 x i8>, <8 x i8>* %x
		%b = load i8, i8* %y
		%c = sext i8 %b to i16
		%d = insertelement <8 x i16> poison, i16 %c, i32 0
		%e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
		%f = zext <8 x i8> %a to <8 x i16>
		%g = mul <8 x i16> %e, %f
		ret <8 x i16> %g
		}

		define <4 x i32> @vwmulsu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
		; CHECK-LABEL: vwmulsu_vx_v4i32_i8:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
		; CHECK-NEXT: vle16.v v9, (a0)
		; CHECK-NEXT: lbu a0, 0(a1)
		; CHECK-NEXT: vwmulsu.vx v8, v9, a0
		; CHECK-NEXT: ret
		%a = load <4 x i16>, <4 x i16>* %x
		%b = load i8, i8* %y
		%c = zext i8 %b to i32
		%d = insertelement <4 x i32> poison, i32 %c, i32 0
		%e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
		%f = sext <4 x i16> %a to <4 x i32>
		%g = mul <4 x i32> %e, %f
		ret <4 x i32> %g
		}

		define <4 x i32> @vwmulsu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
		; CHECK-LABEL: vwmulsu_vx_v4i32_i16:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
		; CHECK-NEXT: vle16.v v9, (a0)
		; CHECK-NEXT: lhu a0, 0(a1)
		; CHECK-NEXT: vwmulsu.vx v8, v9, a0
		; CHECK-NEXT: ret
		%a = load <4 x i16>, <4 x i16>* %x
		%b = load i16, i16* %y
		%c = zext i16 %b to i32
		%d = insertelement <4 x i32> poison, i32 %c, i32 0
		%e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
		%f = sext <4 x i16> %a to <4 x i32>
		%g = mul <4 x i32> %e, %f
		ret <4 x i32> %g
		}

		define <2 x i64> @vwmulsu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) {
		; RV32-LABEL: vwmulsu_vx_v2i64_i8:
		; RV32: # %bb.0:
		; RV32-NEXT: addi sp, sp, -16
		; RV32-NEXT: .cfi_def_cfa_offset 16
		; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
		; RV32-NEXT: lbu a1, 0(a1)
		; RV32-NEXT: vle32.v v8, (a0)
		; RV32-NEXT: sw zero, 12(sp)
		; RV32-NEXT: sw a1, 8(sp)
		; RV32-NEXT: addi a0, sp, 8
		; RV32-NEXT: vlse64.v v9, (a0), zero
		; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
		; RV32-NEXT: vsext.vf2 v10, v8
		; RV32-NEXT: vmul.vv v8, v9, v10
		; RV32-NEXT: addi sp, sp, 16
		; RV32-NEXT: ret
		;
		; RV64-LABEL: vwmulsu_vx_v2i64_i8:
		; RV64: # %bb.0:
		; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
		; RV64-NEXT: vle32.v v9, (a0)
		; RV64-NEXT: lbu a0, 0(a1)
		; RV64-NEXT: vwmulsu.vx v8, v9, a0
		; RV64-NEXT: ret
		%a = load <2 x i32>, <2 x i32>* %x
		%b = load i8, i8* %y
		%c = zext i8 %b to i64
		%d = insertelement <2 x i64> poison, i64 %c, i64 0
		%e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
		%f = sext <2 x i32> %a to <2 x i64>
		%g = mul <2 x i64> %e, %f
		ret <2 x i64> %g
		}

		define <2 x i64> @vwmulsu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) {
		; RV32-LABEL: vwmulsu_vx_v2i64_i16:
		; RV32: # %bb.0:
		; RV32-NEXT: addi sp, sp, -16
		; RV32-NEXT: .cfi_def_cfa_offset 16
		; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
		; RV32-NEXT: lhu a1, 0(a1)
		; RV32-NEXT: vle32.v v8, (a0)
		; RV32-NEXT: sw zero, 12(sp)
		; RV32-NEXT: sw a1, 8(sp)
		; RV32-NEXT: addi a0, sp, 8
		; RV32-NEXT: vlse64.v v9, (a0), zero
		; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
		; RV32-NEXT: vsext.vf2 v10, v8
		; RV32-NEXT: vmul.vv v8, v9, v10
		; RV32-NEXT: addi sp, sp, 16
		; RV32-NEXT: ret
		;
		; RV64-LABEL: vwmulsu_vx_v2i64_i16:
		; RV64: # %bb.0:
		; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
		; RV64-NEXT: vle32.v v9, (a0)
		; RV64-NEXT: lhu a0, 0(a1)
		; RV64-NEXT: vwmulsu.vx v8, v9, a0
		; RV64-NEXT: ret
		%a = load <2 x i32>, <2 x i32>* %x
		%b = load i16, i16* %y
		%c = zext i16 %b to i64
		%d = insertelement <2 x i64> poison, i64 %c, i64 0
		%e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
		%f = sext <2 x i32> %a to <2 x i64>
		%g = mul <2 x i64> %e, %f
		ret <2 x i64> %g
		}

		define <2 x i64> @vwmulsu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) {
		; RV32-LABEL: vwmulsu_vx_v2i64_i32:
		; RV32: # %bb.0:
		; RV32-NEXT: addi sp, sp, -16
		; RV32-NEXT: .cfi_def_cfa_offset 16
		; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
		; RV32-NEXT: lw a1, 0(a1)
		; RV32-NEXT: vle32.v v8, (a0)
		; RV32-NEXT: sw zero, 12(sp)
		; RV32-NEXT: sw a1, 8(sp)
		; RV32-NEXT: addi a0, sp, 8
		; RV32-NEXT: vlse64.v v9, (a0), zero
		; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
		; RV32-NEXT: vsext.vf2 v10, v8
		; RV32-NEXT: vmul.vv v8, v9, v10
		; RV32-NEXT: addi sp, sp, 16
		; RV32-NEXT: ret
		;
		; RV64-LABEL: vwmulsu_vx_v2i64_i32:
		; RV64: # %bb.0:
		; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
		; RV64-NEXT: vle32.v v9, (a0)
		; RV64-NEXT: lwu a0, 0(a1)
		; RV64-NEXT: vwmulsu.vx v8, v9, a0
		; RV64-NEXT: ret
		%a = load <2 x i32>, <2 x i32>* %x
		%b = load i32, i32* %y
		%c = zext i32 %b to i64
		%d = insertelement <2 x i64> poison, i64 %c, i64 0
		%e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
		%f = sext <2 x i32> %a to <2 x i64>
		%g = mul <2 x i64> %e, %f
		ret <2 x i64> %g
		}

		define <8 x i16> @vwmulsu_vx_v8i16_i8_and(<8 x i8>* %x, i16 %y) {
		; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
		; CHECK-NEXT: vle8.v v9, (a0)
		; CHECK-NEXT: vwmulsu.vx v8, v9, a1
		; CHECK-NEXT: ret
		%a = load <8 x i8>, <8 x i8>* %x
		%b = and i16 %y, 255
		%c = insertelement <8 x i16> poison, i16 %b, i32 0
		%d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
		%e = sext <8 x i8> %a to <8 x i16>
		%f = mul <8 x i16> %d, %e
		ret <8 x i16> %f
		}

		define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(<8 x i8>* %x, i16 %y) {
		; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
		; CHECK-NEXT: vle8.v v8, (a0)
		; CHECK-NEXT: andi a0, a1, 254
		; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
		; CHECK-NEXT: vsext.vf2 v9, v8
		; CHECK-NEXT: vmul.vx v8, v9, a0
		; CHECK-NEXT: ret
		%a = load <8 x i8>, <8 x i8>* %x
		%b = and i16 %y, 254
		%c = insertelement <8 x i16> poison, i16 %b, i32 0
		%d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
		%e = sext <8 x i8> %a to <8 x i16>
		%f = mul <8 x i16> %d, %e
		ret <8 x i16> %f
		}

		define <4 x i32> @vwmulsu_vx_v4i32_i16_and(<4 x i16>* %x, i32 %y) {
		; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and:
		; CHECK: # %bb.0:
		; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
		; CHECK-NEXT: vle16.v v9, (a0)
		; CHECK-NEXT: vwmulsu.vx v8, v9, a1
		; CHECK-NEXT: ret
		%a = load <4 x i16>, <4 x i16>* %x
		%b = and i32 %y, 65535
		%c = insertelement <4 x i32> poison, i32 %b, i32 0
		%d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
		%e = sext <4 x i16> %a to <4 x i32>
		%f = mul <4 x i32> %d, %e
		ret <4 x i32> %f
		}

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] support vwmulsu_vx when one input is a scalar splatAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 407416

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll

[RISCV] support vwmulsu_vx when one input is a scalar splat
AbandonedPublic