diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1697,6 +1697,31 @@ } } +multiclass VPatNarrowShiftExtVL_WV { + foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + let Predicates = !listconcat(GetVTypePredicates.Predicates, + GetVTypePredicates.Predicates) in + def : Pat< + (vti.Vector + (riscv_trunc_vector_vl + (op (wti.Vector wti.RegClass:$rs2), + (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), + (vti.Mask true_mask), VLOpFrag)), + srcvalue, (vti.Mask true_mask), VLOpFrag), + (vti.Mask V0), VLOpFrag)), + (!cast(instruction_name#"_WV_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + } +} + +multiclass VPatNarrowShiftVL_WV { + defm : VPatNarrowShiftExtVL_WV; + defm : VPatNarrowShiftExtVL_WV; +} + multiclass VPatMultiplyAddVL_VV_VX { foreach vti = AllIntegerVectors in { defvar suffix = vti.LMul.MX; @@ -2121,6 +2146,9 @@ defm : VPatNarrowShiftSplatExt_WX; defm : VPatNarrowShiftSplatExt_WX; +defm : VPatNarrowShiftVL_WV; +defm : VPatNarrowShiftVL_WV; + defm : VPatBinaryNVL_WV_WX_WI; foreach vtiTowti = AllWidenableIntVectors in { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll @@ -158,6 +158,30 @@ ret <2 x i32> %b } +define <8 x i8> @vnsra_v8i16_v8i8_sext(<8 x i16> %x, <8 x i8> %y) { +; CHECK-LABEL: vnsra_v8i16_v8i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsra.wv v8, v8, v9 +; CHECK-NEXT: ret + %sext = sext <8 x i8> %y to <8 x i16> + %a = ashr <8 x i16> %x, %sext + %b = trunc <8 x i16> %a to <8 x i8> + ret <8 x i8> %b +} + +define <8 x i8> @vnsra_v8i16_v8i8_zext(<8 x i16> %x, <8 x i8> %y) { +; CHECK-LABEL: vnsra_v8i16_v8i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsra.wv v8, v8, v9 +; CHECK-NEXT: ret + %sext = zext <8 x i8> %y to <8 x i16> + %a = ashr <8 x i16> %x, %sext + %b = trunc <8 x i16> %a to <8 x i8> + ret <8 x i8> %b +} + define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { ; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar: ; CHECK: # %bb.0: @@ -313,3 +337,51 @@ %b = trunc <2 x i64> %a to <2 x i32> ret <2 x i32> %b } + +define <4 x i16> @vnsrl_v4i32_v4i16_sext(<4 x i32> %x, <4 x i16> %y) { +; CHECK-LABEL: vnsrl_v4i32_v4i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9 +; CHECK-NEXT: ret + %sext = sext <4 x i16> %y to <4 x i32> + %a = lshr <4 x i32> %x, %sext + %b = trunc <4 x i32> %a to <4 x i16> + ret <4 x i16> %b +} + +define <4 x i16> @vnsrl_v4i32_v4i16_zext(<4 x i32> %x, <4 x i16> %y) { +; CHECK-LABEL: vnsrl_v4i32_v4i16_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9 +; CHECK-NEXT: ret + %zext = zext <4 x i16> %y to <4 x i32> + %a = lshr <4 x i32> %x, %zext + %b = trunc <4 x i32> %a to <4 x i16> + ret <4 x i16> %b +} + +define <2 x i32> @vnsrl_v2i64_v2i32_sext(<2 x i64> %x, <2 x i32> %y) { +; CHECK-LABEL: vnsrl_v2i64_v2i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9 +; CHECK-NEXT: ret + %sext = sext <2 x i32> %y to <2 x i64> + %a = lshr <2 x i64> %x, %sext + %b = trunc <2 x i64> %a to <2 x i32> + ret <2 x i32> %b +} + +define <2 x i32> @vnsrl_v2i64_v2i32_zext(<2 x i64> %x, <2 x i32> %y) { +; CHECK-LABEL: vnsrl_v2i64_v2i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9 +; CHECK-NEXT: ret + %zext = zext <2 x i32> %y to <2 x i64> + %a = lshr <2 x i64> %x, %zext + %b = trunc <2 x i64> %a to <2 x i32> + ret <2 x i32> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.sext.nxv1i32.nxv1i16(, , i32) +declare @llvm.vp.trunc.nxv1i16.nxv1i32(, , i32) +declare @llvm.vp.ashr.nxv1i32(, , , i32) + +define @vsra_vv_nxv1i16( %a, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vnsra.wv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %m, i32 %evl) + ret %vr +} + + +define @vsra_vv_nxv1i16_unmasked( %a, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vnsra.wv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %head, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %allones, i32 %evl) + ret %vr +} + +declare @llvm.vp.sext.nxv1i64.nxv1i32(, , i32) +declare @llvm.vp.trunc.nxv1i32.nxv1i64(, , i32) +declare @llvm.vp.ashr.nxv1i64(, , , i32) + +define @vsra_vv_nxv1i64( %a, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vnsra.wv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %m, i32 %evl) + ret %vr +} + +define @vsra_vv_nxv1i64_unmasked( %a, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vnsra.wv v8, v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %allones, i32 %evl) + ret %vr +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.sext.nxv1i32.nxv1i16(, , i32) +declare @llvm.vp.trunc.nxv1i16.nxv1i32(, , i32) +declare @llvm.vp.lshr.nxv1i32(, , , i32) + +define @vsra_vv_nxv1i16( %a, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %m, i32 %evl) + ret %vr +} + + +define @vsra_vv_nxv1i16_unmasked( %a, %b, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %head, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %allones, i32 %evl) + ret %vr +} + +declare @llvm.vp.sext.nxv1i64.nxv1i32(, , i32) +declare @llvm.vp.trunc.nxv1i32.nxv1i64(, , i32) +declare @llvm.vp.lshr.nxv1i64(, , , i32) + +define @vsra_vv_nxv1i64( %a, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %m, i32 %evl) + ret %vr +} + +define @vsra_vv_nxv1i64_unmasked( %a, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vsra_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wv v8, v8, v9 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %a, %bext, %allones, i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %allones, i32 %evl) + ret %vr +}