diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwmulsu_v2i16(<2 x i8>* %x, <2 x i8>* %y) { ; CHECK-LABEL: vwmulsu_v2i16: @@ -681,3 +681,189 @@ %f = mul <16 x i64> %d, %e ret <16 x i64> %f } + +; ToDo: add tests for vwmulsu_vx when one input is a scalar splat. +define <8 x i16> @vwmulsu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) { +; CHECK-LABEL: vwmulsu_vx_v8i16_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: lbu a0, 0(a1) +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vsext.vf2 v9, v8 +; CHECK-NEXT: vmul.vx v8, v9, a0 +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = load i8, i8* %y + %c = zext i8 %b to i16 + %d = insertelement <8 x i16> poison, i16 %c, i32 0 + %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer + %f = sext <8 x i8> %a to <8 x i16> + %g = mul <8 x i16> %e, %f + ret <8 x i16> %g +} + +define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(<8 x i8>* %x, i8* %y) { +; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: lb a0, 0(a1) +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vzext.vf2 v9, v8 +; CHECK-NEXT: vmul.vx v8, v9, a0 +; CHECK-NEXT: ret + %a = load <8 x i8>, <8 x i8>* %x + %b = load i8, i8* %y + %c = sext i8 %b to i16 + %d = insertelement <8 x i16> poison, i16 %c, i32 0 + %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer + %f = zext <8 x i8> %a to <8 x i16> + %g = mul <8 x i16> %e, %f + ret <8 x i16> %g +} + +define <4 x i32> @vwmulsu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) { +; CHECK-LABEL: vwmulsu_vx_v4i32_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: lbu a0, 0(a1) +; CHECK-NEXT: vwmul.vx v8, v9, a0 +; CHECK-NEXT: ret + %a = load <4 x i16>, <4 x i16>* %x + %b = load i8, i8* %y + %c = zext i8 %b to i32 + %d = insertelement <4 x i32> poison, i32 %c, i32 0 + %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer + %f = sext <4 x i16> %a to <4 x i32> + %g = mul <4 x i32> %e, %f + ret <4 x i32> %g +} + +define <4 x i32> @vwmulsu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) { +; CHECK-LABEL: vwmulsu_vx_v4i32_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lhu a0, 0(a1) +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK-NEXT: vsext.vf2 v9, v8 +; CHECK-NEXT: vmul.vx v8, v9, a0 +; CHECK-NEXT: ret + %a = load <4 x i16>, <4 x i16>* %x + %b = load i16, i16* %y + %c = zext i16 %b to i32 + %d = insertelement <4 x i32> poison, i32 %c, i32 0 + %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer + %f = sext <4 x i16> %a to <4 x i32> + %g = mul <4 x i32> %e, %f + ret <4 x i32> %g +} + +define <2 x i64> @vwmulsu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) { +; RV32-LABEL: vwmulsu_vx_v2i64_i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: lbu a1, 0(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vsext.vf2 v10, v8 +; RV32-NEXT: vmul.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwmulsu_vx_v2i64_i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vle32.v v9, (a0) +; RV64-NEXT: lbu a0, 0(a1) +; RV64-NEXT: vwmul.vx v8, v9, a0 +; RV64-NEXT: ret + %a = load <2 x i32>, <2 x i32>* %x + %b = load i8, i8* %y + %c = zext i8 %b to i64 + %d = insertelement <2 x i64> poison, i64 %c, i64 0 + %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer + %f = sext <2 x i32> %a to <2 x i64> + %g = mul <2 x i64> %e, %f + ret <2 x i64> %g +} + +define <2 x i64> @vwmulsu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) { +; RV32-LABEL: vwmulsu_vx_v2i64_i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: lhu a1, 0(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vsext.vf2 v10, v8 +; RV32-NEXT: vmul.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwmulsu_vx_v2i64_i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vle32.v v9, (a0) +; RV64-NEXT: lhu a0, 0(a1) +; RV64-NEXT: vwmul.vx v8, v9, a0 +; RV64-NEXT: ret + %a = load <2 x i32>, <2 x i32>* %x + %b = load i16, i16* %y + %c = zext i16 %b to i64 + %d = insertelement <2 x i64> poison, i64 %c, i64 0 + %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer + %f = sext <2 x i32> %a to <2 x i64> + %g = mul <2 x i64> %e, %f + ret <2 x i64> %g +} + +define <2 x i64> @vwmulsu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) { +; RV32-LABEL: vwmulsu_vx_v2i64_i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV32-NEXT: vsext.vf2 v10, v8 +; RV32-NEXT: vmul.vv v8, v9, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwmulsu_vx_v2i64_i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: lwu a0, 0(a1) +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; RV64-NEXT: vsext.vf2 v9, v8 +; RV64-NEXT: vmul.vx v8, v9, a0 +; RV64-NEXT: ret + %a = load <2 x i32>, <2 x i32>* %x + %b = load i32, i32* %y + %c = zext i32 %b to i64 + %d = insertelement <2 x i64> poison, i64 %c, i64 0 + %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer + %f = sext <2 x i32> %a to <2 x i64> + %g = mul <2 x i64> %e, %f + ret <2 x i64> %g +}