diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -399,3 +399,38 @@ defm : VPatUnaryVL_V; defm : VPatUnaryVL_V; defm : VPatUnaryVL_V; + +foreach vtiToWti = AllWidenableIntVectors in { + defvar vti = vtiToWti.Vti; + defvar wti = vtiToWti.Wti; + let Predicates = !listconcat([HasStdExtZvbb], + GetVTypePredicates.Predicates, + GetVTypePredicates.Predicates) in { + def : Pat<(riscv_shl_vl + (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1))), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_shl_vl + (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_shl_vl + (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), + (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll @@ -0,0 +1,760 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 + +; ============================================================================== +; i32 -> i64 +; ============================================================================== + +declare @llvm.vp.shl.nxv2i64(, , , i32) + +define @vwsll_vv_nxv2i64_sext( %a, %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vv_nxv2i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = sext %b to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vv_nxv2i64_zext( %a, %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vv_nxv2i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = zext %b to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i64_nxv2i64( %a, i64 %b, %m, i32 zeroext %vl) { +; CHECK-RV32-LABEL: vwsll_vx_i64_nxv2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vwsll_vx_i64_nxv2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 +; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv2i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-ZVBB32-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB32-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv2i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB64-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB64-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB64-NEXT: ret + %head = insertelement poison, i64 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %z = call @llvm.vp.shl.nxv2i64( %x, %splat, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i32_nxv2i64_sext( %a, i32 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i32_nxv2i64_zext( %a, i32 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i16_nxv2i64_sext( %a, i16 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i16_nxv2i64_zext( %a, i16 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i8_nxv2i64_sext( %a, i8 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = call @llvm.vp.shl.nxv2i64( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vi_nxv2i64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %z = call @llvm.vp.shl.nxv2i64( %x, shufflevector( insertelement( poison, i64 2, i32 0), poison, zeroinitializer), %m, i32 %vl) + ret %z +} + +; ============================================================================== +; i16 -> i32 +; ============================================================================== + +declare @llvm.vp.shl.nxv4i32(, , , i32) + +define @vwsll_vv_nxv4i32_sext( %a, %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vv_nxv4i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = sext %b to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vv_nxv4i32_zext( %a, %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vv_nxv4i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv4i32_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = zext %b to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i64_nxv4i32( %a, i64 %b, %m, i32 zeroext %vl) { +; CHECK-RV32-LABEL: vwsll_vx_i64_nxv4i32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a1, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 +; CHECK-RV32-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-RV32-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vwsll_vx_i64_nxv4i32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v12, a0 +; CHECK-RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 +; CHECK-RV64-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-RV64-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv4i32: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a1, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v12, (a0), zero +; CHECK-ZVBB32-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB32-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-ZVBB32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-ZVBB32-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv4i32: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-ZVBB64-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB64-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB64-NEXT: ret + %head = insertelement poison, i64 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = trunc %splat to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i32_nxv4i32( %a, i32 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i32_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %z = call @llvm.vp.shl.nxv4i32( %x, %splat, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i16_nxv4i32_sext( %a, i16 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i16_nxv4i32_zext( %a, i16 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv4i32_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i8_nxv4i32_sext( %a, i8 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv4i32_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = call @llvm.vp.shl.nxv4i32( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vi_nxv4i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_nxv4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %z = call @llvm.vp.shl.nxv4i32( %x, shufflevector( insertelement( poison, i32 2, i32 0), poison, zeroinitializer), %m, i32 %vl) + ret %z +} + + +; ============================================================================== +; i8 -> i16 +; ============================================================================== + +declare @llvm.vp.shl.nxv8i16(, , , i32) + +define @vwsll_vv_nxv8i16_sext( %a, %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vv_nxv8i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = sext %b to + %z = call @llvm.vp.shl.nxv8i16( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vv_nxv8i16_zext( %a, %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vv_nxv8i16_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv8i16_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = zext %b to + %z = call @llvm.vp.shl.nxv8i16( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i64_nxv8i16( %a, i64 %b, %m, i32 zeroext %vl) { +; CHECK-RV32-LABEL: vwsll_vx_i64_nxv8i16: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a1, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a0), zero +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-RV32-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-RV32-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-RV32-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vwsll_vx_i64_nxv8i16: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v16, a0 +; CHECK-RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 +; CHECK-RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-RV64-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-RV64-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-RV64-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vwsll_vx_i64_nxv8i16: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a1, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v16, (a0), zero +; CHECK-ZVBB32-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-ZVBB32-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-ZVBB32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-ZVBB32-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-ZVBB32-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-ZVBB32-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vwsll_vx_i64_nxv8i16: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-ZVBB64-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB64-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB64-NEXT: ret + %head = insertelement poison, i64 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = trunc %splat to + %z = call @llvm.vp.shl.nxv8i16( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i32_nxv8i16( %a, i32 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i32_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = trunc %splat to + %z = call @llvm.vp.shl.nxv8i16( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i16_nxv8i16( %a, i16 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i16_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %z = call @llvm.vp.shl.nxv8i16( %x, %splat, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i8_nxv8i16_sext( %a, i8 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = call @llvm.vp.shl.nxv8i16( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv8i16_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = call @llvm.vp.shl.nxv8i16( %x, %y, %m, i32 %vl) + ret %z +} + +define @vwsll_vi_nxv8i16( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vwsll_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_nxv8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2, v0.t +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %z = call @llvm.vp.shl.nxv8i16( %x, shufflevector( insertelement( poison, i16 2, i32 0), poison, zeroinitializer), %m, i32 %vl) + ret %z +}