diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -231,6 +231,8 @@ SREM_VL, SRA_VL, SRL_VL, + ROTL_VL, + ROTR_VL, SUB_VL, UDIV_VL, UREM_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1061,7 +1061,8 @@ if (Subtarget.hasStdExtZvbb()) { setOperationAction({ISD::BITREVERSE, ISD::BSWAP, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ, - ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, + ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP, ISD::ROTL, + ISD::ROTR}, VT, Custom); } else { // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the @@ -4825,6 +4826,8 @@ OP_CASE(SHL) OP_CASE(SRA) OP_CASE(SRL) + OP_CASE(ROTL) + OP_CASE(ROTR) OP_CASE(BSWAP) OP_CASE(CTTZ) OP_CASE(CTLZ) @@ -4939,7 +4942,7 @@ Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 122 && + 124 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -4963,7 +4966,7 @@ Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 122 && + 124 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -5015,6 +5018,10 @@ return lowerShiftRightParts(Op, DAG, false); case ISD::ROTL: case ISD::ROTR: + if (Op.getValueType().isFixedLengthVector()) { + assert(Subtarget.hasStdExtZvbb()); + return lowerToScalableOp(Op, DAG); + } assert(Subtarget.hasVendorXTHeadBb() && !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && "Unexpected custom legalization"); @@ -16415,6 +16422,8 @@ NODE_NAME_CASE(SREM_VL) NODE_NAME_CASE(SRA_VL) NODE_NAME_CASE(SRL_VL) + NODE_NAME_CASE(ROTL_VL) + NODE_NAME_CASE(ROTR_VL) NODE_NAME_CASE(SUB_VL) NODE_NAME_CASE(UDIV_VL) NODE_NAME_CASE(UREM_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -98,6 +98,8 @@ def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>; def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>; def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>; +def riscv_rotl_vl : SDNode<"RISCVISD::ROTL_VL", SDT_RISCVIntBinOp_VL>; +def riscv_rotr_vl : SDNode<"RISCVISD::ROTR_VL", SDT_RISCVIntBinOp_VL>; def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -584,6 +584,25 @@ defm : VPatUnaryVL_V; defm : VPatUnaryVL_V; +defm : VPatBinaryVL_VV_VX; +// Although there is no vrol.vi, an immediate rotate left can be achieved by +// negating the immediate in vror.vi +foreach vti = AllIntegerVectors in { + let Predicates = !listconcat([HasStdExtZvbb], + GetVTypePredicates.Predicates) in { + def : Pat<(riscv_rotl_vl vti.RegClass:$rs2, + (vti.Vector (SplatPat_uimm6 uimm6:$rs1)), + (vti.Vector vti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVROR_VI_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, + vti.RegClass:$rs2, + (!cast("InvRot" # vti.SEW # "Imm") uimm6:$rs1), + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} +defm : VPatBinaryVL_VV_VX_VI; + foreach vtiToWti = AllWidenableIntVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll @@ -0,0 +1,1275 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 + +declare <1 x i8> @llvm.fshl.v1i8(<1 x i8>, <1 x i8>, <1 x i8>) + +define <1 x i8> @vrol_vv_v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: vrol_vv_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i8> @llvm.fshl.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> %b) + ret <1 x i8> %x +} + +define <1 x i8> @vrol_vx_v1i8(<1 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <1 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <1 x i8> %b.head, <1 x i8> poison, <1 x i32> zeroinitializer + %x = call <1 x i8> @llvm.fshl.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> %b.splat) + ret <1 x i8> %x +} + +declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) + +define <2 x i8> @vrol_vv_v2i8(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: vrol_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> %b) + ret <2 x i8> %x +} + +define <2 x i8> @vrol_vx_v2i8(<2 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <2 x i8> %b.head, <2 x i8> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> %b.splat) + ret <2 x i8> %x +} + +declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) + +define <4 x i8> @vrol_vv_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: vrol_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %x +} + +define <4 x i8> @vrol_vx_v4i8(<4 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <4 x i8> %b.head, <4 x i8> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> %b.splat) + ret <4 x i8> %x +} + +declare <8 x i8> @llvm.fshl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) + +define <8 x i8> @vrol_vv_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vrol_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %x +} + +define <8 x i8> @vrol_vx_v8i8(<8 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <8 x i8> %b.head, <8 x i8> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b.splat) + ret <8 x i8> %x +} + +declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @vrol_vv_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vrol_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %x +} + +define <16 x i8> @vrol_vx_v16i8(<16 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <16 x i8> %b.head, <16 x i8> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %b.splat) + ret <16 x i8> %x +} + +declare <32 x i8> @llvm.fshl.v32i8(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @vrol_vv_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: vrol_vv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vand.vi v12, v10, 7 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> %b) + ret <32 x i8> %x +} + +define <32 x i8> @vrol_vx_v32i8(<32 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vand.vi v12, v10, 7 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <32 x i8> %b.head, <32 x i8> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> %b.splat) + ret <32 x i8> %x +} + +declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>) + +define <64 x i8> @vrol_vv_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK-LABEL: vrol_vv_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vand.vi v16, v12, 7 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 64 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> %b) + ret <64 x i8> %x +} + +define <64 x i8> @vrol_vx_v64i8(<64 x i8> %a, i8 %b) { +; CHECK-LABEL: vrol_vx_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vand.vi v16, v12, 7 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 7 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 64 +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <64 x i8> %b.head, <64 x i8> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> %b.splat) + ret <64 x i8> %x +} + +declare <1 x i16> @llvm.fshl.v1i16(<1 x i16>, <1 x i16>, <1 x i16>) + +define <1 x i16> @vrol_vv_v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: vrol_vv_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i16> @llvm.fshl.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> %b) + ret <1 x i16> %x +} + +define <1 x i16> @vrol_vx_v1i16(<1 x i16> %a, i16 %b) { +; CHECK-LABEL: vrol_vx_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <1 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <1 x i16> %b.head, <1 x i16> poison, <1 x i32> zeroinitializer + %x = call <1 x i16> @llvm.fshl.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> %b.splat) + ret <1 x i16> %x +} + +declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) + +define <2 x i16> @vrol_vv_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: vrol_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %x +} + +define <2 x i16> @vrol_vx_v2i16(<2 x i16> %a, i16 %b) { +; CHECK-LABEL: vrol_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <2 x i16> %b.head, <2 x i16> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> %b.splat) + ret <2 x i16> %x +} + +declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) + +define <4 x i16> @vrol_vv_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vrol_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %x +} + +define <4 x i16> @vrol_vx_v4i16(<4 x i16> %a, i16 %b) { +; CHECK-LABEL: vrol_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <4 x i16> %b.head, <4 x i16> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> %b.splat) + ret <4 x i16> %x +} + +declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @vrol_vv_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vrol_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %x +} + +define <8 x i16> @vrol_vx_v8i16(<8 x i16> %a, i16 %b) { +; CHECK-LABEL: vrol_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <8 x i16> %b.head, <8 x i16> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %b.splat) + ret <8 x i16> %x +} + +declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @vrol_vv_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: vrol_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vand.vi v12, v10, 15 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> %b) + ret <16 x i16> %x +} + +define <16 x i16> @vrol_vx_v16i16(<16 x i16> %a, i16 %b) { +; CHECK-LABEL: vrol_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vand.vi v12, v10, 15 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <16 x i16> %b.head, <16 x i16> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> %b.splat) + ret <16 x i16> %x +} + +declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) + +define <32 x i16> @vrol_vv_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK-LABEL: vrol_vv_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vand.vi v16, v12, 15 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %x +} + +define <32 x i16> @vrol_vx_v32i16(<32 x i16> %a, i16 %b) { +; CHECK-LABEL: vrol_vx_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vand.vi v16, v12, 15 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 15 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <32 x i16> %b.head, <32 x i16> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> %b.splat) + ret <32 x i16> %x +} + +declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) + +define <1 x i32> @vrol_vv_v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: vrol_vv_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> %b) + ret <1 x i32> %x +} + +define <1 x i32> @vrol_vx_v1i32(<1 x i32> %a, i32 %b) { +; CHECK-LABEL: vrol_vx_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <1 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <1 x i32> %b.head, <1 x i32> poison, <1 x i32> zeroinitializer + %x = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> %b.splat) + ret <1 x i32> %x +} + +declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) + +define <2 x i32> @vrol_vv_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vrol_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %x +} + +define <2 x i32> @vrol_vx_v2i32(<2 x i32> %a, i32 %b) { +; CHECK-LABEL: vrol_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <2 x i32> %b.head, <2 x i32> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> %b.splat) + ret <2 x i32> %x +} + +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @vrol_vv_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vrol_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %x +} + +define <4 x i32> @vrol_vx_v4i32(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: vrol_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <4 x i32> %b.head, <4 x i32> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %b.splat) + ret <4 x i32> %x +} + +declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @vrol_vv_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: vrol_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> %b) + ret <8 x i32> %x +} + +define <8 x i32> @vrol_vx_v8i32(<8 x i32> %a, i32 %b) { +; CHECK-LABEL: vrol_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <8 x i32> %b.head, <8 x i32> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> %b.splat) + ret <8 x i32> %x +} + +declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) + +define <16 x i32> @vrol_vv_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: vrol_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> %b) + ret <16 x i32> %x +} + +define <16 x i32> @vrol_vx_v16i32(<16 x i32> %a, i32 %b) { +; CHECK-LABEL: vrol_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vx_v16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <16 x i32> %b.head, <16 x i32> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> %b.splat) + ret <16 x i32> %x +} + +declare <1 x i64> @llvm.fshl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) + +define <1 x i64> @vrol_vv_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-RV32-LABEL: vrol_vv_v1i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-RV32-NEXT: vmv.s.x v10, a0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vv v11, v11, v10 +; CHECK-RV32-NEXT: vsrl.vv v11, v8, v11 +; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v8, v11 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vv_v1i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %x +} + +define <1 x i64> @vrol_vx_v1i64(<1 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vrol_vx_v1i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-RV32-NEXT: vmv.s.x v10, a0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vv v11, v11, v10 +; CHECK-RV32-NEXT: vsrl.vv v11, v8, v11 +; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v8, v11 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vx_v1i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v9, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vrol_vx_v1i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero +; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vrol_vx_v1i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <1 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <1 x i64> %b.head, <1 x i64> poison, <1 x i32> zeroinitializer + %x = call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> %b.splat) + ret <1 x i64> %x +} + +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @vrol_vv_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-RV32-LABEL: vrol_vv_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vx v10, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v11, 0 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vv_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %x +} + +define <2 x i64> @vrol_vx_v2i64(<2 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vrol_vx_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v10, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v11, 0 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vx_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v9, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vrol_vx_v2i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero +; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v9 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vrol_vx_v2i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <2 x i64> %b.head, <2 x i64> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %b.splat) + ret <2 x i64> %x +} + +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @vrol_vv_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-RV32-LABEL: vrol_vv_v4i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vand.vx v12, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v14, 0 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vv_v4i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vand.vx v12, v10, a0 +; CHECK-RV64-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 +; CHECK-RV64-NEXT: vand.vx v10, v10, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV64-NEXT: vor.vv v8, v12, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> %b) + ret <4 x i64> %x +} + +define <4 x i64> @vrol_vx_v4i64(<4 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vrol_vx_v4i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v12, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v14, 0 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vx_v4i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v10, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v12, v10, a0 +; CHECK-RV64-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 +; CHECK-RV64-NEXT: vand.vx v10, v10, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV64-NEXT: vor.vv v8, v12, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vrol_vx_v4i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v10, (a0), zero +; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v10 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vrol_vx_v4i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <4 x i64> %b.head, <4 x i64> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> %b.splat) + ret <4 x i64> %x +} + +declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) + +define <8 x i64> @vrol_vv_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK-RV32-LABEL: vrol_vv_v8i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vand.vx v16, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v20, 0 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vv_v8i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV64-NEXT: vand.vx v16, v12, a0 +; CHECK-RV64-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 +; CHECK-RV64-NEXT: vand.vx v12, v12, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV64-NEXT: vor.vv v8, v16, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vrol_vv_v8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vrol.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> %b) + ret <8 x i64> %x +} + +define <8 x i64> @vrol_vx_v8i64(<8 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vrol_vx_v8i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v16, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v20, 0 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vrol_vx_v8i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v12, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v16, v12, a0 +; CHECK-RV64-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 +; CHECK-RV64-NEXT: vand.vx v12, v12, a0 +; CHECK-RV64-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV64-NEXT: vor.vv v8, v16, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vrol_vx_v8i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v12, (a0), zero +; CHECK-ZVBB32-NEXT: vrol.vv v8, v8, v12 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vrol_vx_v8i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB64-NEXT: vrol.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <8 x i64> %b.head, <8 x i64> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> %b.splat) + ret <8 x i64> %x +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll @@ -0,0 +1,2241 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 + +declare <1 x i8> @llvm.fshr.v1i8(<1 x i8>, <1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.fshl.v1i8(<1 x i8>, <1 x i8>, <1 x i8>) + +define <1 x i8> @vror_vv_v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: vror_vv_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i8> @llvm.fshr.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> %b) + ret <1 x i8> %x +} + +define <1 x i8> @vror_vx_v1i8(<1 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <1 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <1 x i8> %b.head, <1 x i8> poison, <1 x i32> zeroinitializer + %x = call <1 x i8> @llvm.fshr.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> %b.splat) + ret <1 x i8> %x +} + +define <1 x i8> @vror_vi_v1i8(<1 x i8> %a) { +; CHECK-LABEL: vror_vi_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i8> @llvm.fshr.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> shufflevector(<1 x i8> insertelement(<1 x i8> poison, i8 1, i32 0), <1 x i8> poison, <1 x i32> zeroinitializer)) + ret <1 x i8> %x +} + +define <1 x i8> @vror_vi_rotl_v1i8(<1 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v1i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i8> @llvm.fshl.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> shufflevector(<1 x i8> insertelement(<1 x i8> poison, i8 1, i32 0), <1 x i8> poison, <1 x i32> zeroinitializer)) + ret <1 x i8> %x +} + +declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) +declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) + +define <2 x i8> @vror_vv_v2i8(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: vror_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> %b) + ret <2 x i8> %x +} + +define <2 x i8> @vror_vx_v2i8(<2 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <2 x i8> %b.head, <2 x i8> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> %b.splat) + ret <2 x i8> %x +} + +define <2 x i8> @vror_vi_v2i8(<2 x i8> %a) { +; CHECK-LABEL: vror_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> shufflevector(<2 x i8> insertelement(<2 x i8> poison, i8 1, i32 0), <2 x i8> poison, <2 x i32> zeroinitializer)) + ret <2 x i8> %x +} + +define <2 x i8> @vror_vi_rotl_v2i8(<2 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> shufflevector(<2 x i8> insertelement(<2 x i8> poison, i8 1, i32 0), <2 x i8> poison, <2 x i32> zeroinitializer)) + ret <2 x i8> %x +} + +declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) +declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) + +define <4 x i8> @vror_vv_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: vror_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %x +} + +define <4 x i8> @vror_vx_v4i8(<4 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <4 x i8> %b.head, <4 x i8> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> %b.splat) + ret <4 x i8> %x +} + +define <4 x i8> @vror_vi_v4i8(<4 x i8> %a) { +; CHECK-LABEL: vror_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> shufflevector(<4 x i8> insertelement(<4 x i8> poison, i8 1, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer)) + ret <4 x i8> %x +} + +define <4 x i8> @vror_vi_rotl_v4i8(<4 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> shufflevector(<4 x i8> insertelement(<4 x i8> poison, i8 1, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer)) + ret <4 x i8> %x +} + +declare <8 x i8> @llvm.fshr.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) +declare <8 x i8> @llvm.fshl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) + +define <8 x i8> @vror_vv_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vror_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i8> @llvm.fshr.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %x +} + +define <8 x i8> @vror_vx_v8i8(<8 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <8 x i8> %b.head, <8 x i8> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.fshr.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b.splat) + ret <8 x i8> %x +} + +define <8 x i8> @vror_vi_v8i8(<8 x i8> %a) { +; CHECK-LABEL: vror_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i8> @llvm.fshr.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> shufflevector(<8 x i8> insertelement(<8 x i8> poison, i8 1, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)) + ret <8 x i8> %x +} + +define <8 x i8> @vror_vi_rotl_v8i8(<8 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v8i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> shufflevector(<8 x i8> insertelement(<8 x i8> poison, i8 1, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)) + ret <8 x i8> %x +} + +declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @vror_vv_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vror_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %x +} + +define <16 x i8> @vror_vx_v16i8(<16 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 7 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 7 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <16 x i8> %b.head, <16 x i8> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %b.splat) + ret <16 x i8> %x +} + +define <16 x i8> @vror_vi_v16i8(<16 x i8> %a) { +; CHECK-LABEL: vror_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> shufflevector(<16 x i8> insertelement(<16 x i8> poison, i8 1, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)) + ret <16 x i8> %x +} + +define <16 x i8> @vror_vi_rotl_v16i8(<16 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v16i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> shufflevector(<16 x i8> insertelement(<16 x i8> poison, i8 1, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)) + ret <16 x i8> %x +} + +declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>) +declare <32 x i8> @llvm.fshl.v32i8(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @vror_vv_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: vror_vv_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vand.vi v12, v10, 7 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 7 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> %b) + ret <32 x i8> %x +} + +define <32 x i8> @vror_vx_v32i8(<32 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vand.vi v12, v10, 7 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 7 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <32 x i8> %b.head, <32 x i8> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> %b.splat) + ret <32 x i8> %x +} + +define <32 x i8> @vror_vi_v32i8(<32 x i8> %a) { +; CHECK-LABEL: vror_vi_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsll.vi v10, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> shufflevector(<32 x i8> insertelement(<32 x i8> poison, i8 1, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)) + ret <32 x i8> %x +} + +define <32 x i8> @vror_vi_rotl_v32i8(<32 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v32i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> shufflevector(<32 x i8> insertelement(<32 x i8> poison, i8 1, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)) + ret <32 x i8> %x +} + +declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>) +declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>) + +define <64 x i8> @vror_vv_v64i8(<64 x i8> %a, <64 x i8> %b) { +; CHECK-LABEL: vror_vv_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vand.vi v16, v12, 7 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 7 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 64 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> %b) + ret <64 x i8> %x +} + +define <64 x i8> @vror_vx_v64i8(<64 x i8> %a, i8 %b) { +; CHECK-LABEL: vror_vx_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vand.vi v16, v12, 7 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 7 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 64 +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %b.splat = shufflevector <64 x i8> %b.head, <64 x i8> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> %b.splat) + ret <64 x i8> %x +} + +define <64 x i8> @vror_vi_v64i8(<64 x i8> %a) { +; CHECK-LABEL: vror_vi_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsll.vi v12, v8, 7 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 64 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> shufflevector(<64 x i8> insertelement(<64 x i8> poison, i8 1, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)) + ret <64 x i8> %x +} + +define <64 x i8> @vror_vi_rotl_v64i8(<64 x i8> %a) { +; CHECK-LABEL: vror_vi_rotl_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 7 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v64i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 64 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 7 +; CHECK-ZVBB-NEXT: ret + %x = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> shufflevector(<64 x i8> insertelement(<64 x i8> poison, i8 1, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)) + ret <64 x i8> %x +} + +declare <1 x i16> @llvm.fshr.v1i16(<1 x i16>, <1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.fshl.v1i16(<1 x i16>, <1 x i16>, <1 x i16>) + +define <1 x i16> @vror_vv_v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: vror_vv_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i16> @llvm.fshr.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> %b) + ret <1 x i16> %x +} + +define <1 x i16> @vror_vx_v1i16(<1 x i16> %a, i16 %b) { +; CHECK-LABEL: vror_vx_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <1 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <1 x i16> %b.head, <1 x i16> poison, <1 x i32> zeroinitializer + %x = call <1 x i16> @llvm.fshr.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> %b.splat) + ret <1 x i16> %x +} + +define <1 x i16> @vror_vi_v1i16(<1 x i16> %a) { +; CHECK-LABEL: vror_vi_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 15 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i16> @llvm.fshr.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> shufflevector(<1 x i16> insertelement(<1 x i16> poison, i16 1, i32 0), <1 x i16> poison, <1 x i32> zeroinitializer)) + ret <1 x i16> %x +} + +define <1 x i16> @vror_vi_rotl_v1i16(<1 x i16> %a) { +; CHECK-LABEL: vror_vi_rotl_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v1i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 15 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i16> @llvm.fshl.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> shufflevector(<1 x i16> insertelement(<1 x i16> poison, i16 1, i32 0), <1 x i16> poison, <1 x i32> zeroinitializer)) + ret <1 x i16> %x +} + +declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) +declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) + +define <2 x i16> @vror_vv_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: vror_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %x +} + +define <2 x i16> @vror_vx_v2i16(<2 x i16> %a, i16 %b) { +; CHECK-LABEL: vror_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <2 x i16> %b.head, <2 x i16> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> %b.splat) + ret <2 x i16> %x +} + +define <2 x i16> @vror_vi_v2i16(<2 x i16> %a) { +; CHECK-LABEL: vror_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 15 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> shufflevector(<2 x i16> insertelement(<2 x i16> poison, i16 1, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer)) + ret <2 x i16> %x +} + +define <2 x i16> @vror_vi_rotl_v2i16(<2 x i16> %a) { +; CHECK-LABEL: vror_vi_rotl_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v2i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 15 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> shufflevector(<2 x i16> insertelement(<2 x i16> poison, i16 1, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer)) + ret <2 x i16> %x +} + +declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) +declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) + +define <4 x i16> @vror_vv_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vror_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %x +} + +define <4 x i16> @vror_vx_v4i16(<4 x i16> %a, i16 %b) { +; CHECK-LABEL: vror_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <4 x i16> %b.head, <4 x i16> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> %b.splat) + ret <4 x i16> %x +} + +define <4 x i16> @vror_vi_v4i16(<4 x i16> %a) { +; CHECK-LABEL: vror_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 15 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> shufflevector(<4 x i16> insertelement(<4 x i16> poison, i16 1, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)) + ret <4 x i16> %x +} + +define <4 x i16> @vror_vi_rotl_v4i16(<4 x i16> %a) { +; CHECK-LABEL: vror_vi_rotl_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v4i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 15 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> shufflevector(<4 x i16> insertelement(<4 x i16> poison, i16 1, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)) + ret <4 x i16> %x +} + +declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @vror_vv_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vror_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %x +} + +define <8 x i16> @vror_vx_v8i16(<8 x i16> %a, i16 %b) { +; CHECK-LABEL: vror_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vi v10, v9, 15 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vi v9, v9, 15 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <8 x i16> %b.head, <8 x i16> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %b.splat) + ret <8 x i16> %x +} + +define <8 x i16> @vror_vi_v8i16(<8 x i16> %a) { +; CHECK-LABEL: vror_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 15 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> shufflevector(<8 x i16> insertelement(<8 x i16> poison, i16 1, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)) + ret <8 x i16> %x +} + +define <8 x i16> @vror_vi_rotl_v8i16(<8 x i16> %a) { +; CHECK-LABEL: vror_vi_rotl_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v8i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 15 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> shufflevector(<8 x i16> insertelement(<8 x i16> poison, i16 1, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)) + ret <8 x i16> %x +} + +declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) +declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @vror_vv_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: vror_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vand.vi v12, v10, 15 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 15 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> %b) + ret <16 x i16> %x +} + +define <16 x i16> @vror_vx_v16i16(<16 x i16> %a, i16 %b) { +; CHECK-LABEL: vror_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vand.vi v12, v10, 15 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vi v10, v10, 15 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <16 x i16> %b.head, <16 x i16> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> %b.splat) + ret <16 x i16> %x +} + +define <16 x i16> @vror_vi_v16i16(<16 x i16> %a) { +; CHECK-LABEL: vror_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsll.vi v10, v8, 15 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> shufflevector(<16 x i16> insertelement(<16 x i16> poison, i16 1, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)) + ret <16 x i16> %x +} + +define <16 x i16> @vror_vi_rotl_v16i16(<16 x i16> %a) { +; CHECK-LABEL: vror_vi_rotl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 15 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> shufflevector(<16 x i16> insertelement(<16 x i16> poison, i16 1, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)) + ret <16 x i16> %x +} + +declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) +declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) + +define <32 x i16> @vror_vv_v32i16(<32 x i16> %a, <32 x i16> %b) { +; CHECK-LABEL: vror_vv_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vand.vi v16, v12, 15 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 15 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %x +} + +define <32 x i16> @vror_vx_v32i16(<32 x i16> %a, i16 %b) { +; CHECK-LABEL: vror_vx_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vand.vi v16, v12, 15 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vi v12, v12, 15 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %b.splat = shufflevector <32 x i16> %b.head, <32 x i16> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> %b.splat) + ret <32 x i16> %x +} + +define <32 x i16> @vror_vi_v32i16(<32 x i16> %a) { +; CHECK-LABEL: vror_vi_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsll.vi v12, v8, 15 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> shufflevector(<32 x i16> insertelement(<32 x i16> poison, i16 1, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)) + ret <32 x i16> %x +} + +define <32 x i16> @vror_vi_rotl_v32i16(<32 x i16> %a) { +; CHECK-LABEL: vror_vi_rotl_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 15 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v32i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a0, 32 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 15 +; CHECK-ZVBB-NEXT: ret + %x = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> shufflevector(<32 x i16> insertelement(<32 x i16> poison, i16 1, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)) + ret <32 x i16> %x +} + +declare <1 x i32> @llvm.fshr.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.fshl.v1i32(<1 x i32>, <1 x i32>, <1 x i32>) + +define <1 x i32> @vror_vv_v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: vror_vv_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> %b) + ret <1 x i32> %x +} + +define <1 x i32> @vror_vx_v1i32(<1 x i32> %a, i32 %b) { +; CHECK-LABEL: vror_vx_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <1 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <1 x i32> %b.head, <1 x i32> poison, <1 x i32> zeroinitializer + %x = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> %b.splat) + ret <1 x i32> %x +} + +define <1 x i32> @vror_vi_v1i32(<1 x i32> %a) { +; CHECK-LABEL: vror_vi_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 31 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> shufflevector(<1 x i32> insertelement(<1 x i32> poison, i32 1, i32 0), <1 x i32> poison, <1 x i32> zeroinitializer)) + ret <1 x i32> %x +} + +define <1 x i32> @vror_vi_rotl_v1i32(<1 x i32> %a) { +; CHECK-LABEL: vror_vi_rotl_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 31 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v1i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 31 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> shufflevector(<1 x i32> insertelement(<1 x i32> poison, i32 1, i32 0), <1 x i32> poison, <1 x i32> zeroinitializer)) + ret <1 x i32> %x +} + +declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) + +define <2 x i32> @vror_vv_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vror_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %x +} + +define <2 x i32> @vror_vx_v2i32(<2 x i32> %a, i32 %b) { +; CHECK-LABEL: vror_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <2 x i32> %b.head, <2 x i32> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> %b.splat) + ret <2 x i32> %x +} + +define <2 x i32> @vror_vi_v2i32(<2 x i32> %a) { +; CHECK-LABEL: vror_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 31 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> shufflevector(<2 x i32> insertelement(<2 x i32> poison, i32 1, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)) + ret <2 x i32> %x +} + +define <2 x i32> @vror_vi_rotl_v2i32(<2 x i32> %a) { +; CHECK-LABEL: vror_vi_rotl_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 31 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v2i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 31 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> shufflevector(<2 x i32> insertelement(<2 x i32> poison, i32 1, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)) + ret <2 x i32> %x +} + +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @vror_vv_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vror_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %x +} + +define <4 x i32> @vror_vx_v4i32(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: vror_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <4 x i32> %b.head, <4 x i32> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %b.splat) + ret <4 x i32> %x +} + +define <4 x i32> @vror_vi_v4i32(<4 x i32> %a) { +; CHECK-LABEL: vror_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsll.vi v9, v8, 31 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> shufflevector(<4 x i32> insertelement(<4 x i32> poison, i32 1, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)) + ret <4 x i32> %x +} + +define <4 x i32> @vror_vi_rotl_v4i32(<4 x i32> %a) { +; CHECK-LABEL: vror_vi_rotl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 31 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v4i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 31 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> shufflevector(<4 x i32> insertelement(<4 x i32> poison, i32 1, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)) + ret <4 x i32> %x +} + +declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) +declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @vror_vv_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: vror_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> %b) + ret <8 x i32> %x +} + +define <8 x i32> @vror_vx_v8i32(<8 x i32> %a, i32 %b) { +; CHECK-LABEL: vror_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <8 x i32> %b.head, <8 x i32> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> %b.splat) + ret <8 x i32> %x +} + +define <8 x i32> @vror_vi_v8i32(<8 x i32> %a) { +; CHECK-LABEL: vror_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsll.vi v10, v8, 31 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> shufflevector(<8 x i32> insertelement(<8 x i32> poison, i32 1, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)) + ret <8 x i32> %x +} + +define <8 x i32> @vror_vi_rotl_v8i32(<8 x i32> %a) { +; CHECK-LABEL: vror_vi_rotl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v10, v8, 31 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 31 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> shufflevector(<8 x i32> insertelement(<8 x i32> poison, i32 1, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)) + ret <8 x i32> %x +} + +declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) +declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) + +define <16 x i32> @vror_vv_v16i32(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: vror_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> %b) + ret <16 x i32> %x +} + +define <16 x i32> @vror_vx_v16i32(<16 x i32> %a, i32 %b) { +; CHECK-LABEL: vror_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: li a0, 31 +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vx_v16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB-NEXT: ret + %b.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %b.splat = shufflevector <16 x i32> %b.head, <16 x i32> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> %b.splat) + ret <16 x i32> %x +} + +define <16 x i32> @vror_vi_v16i32(<16 x i32> %a) { +; CHECK-LABEL: vror_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsll.vi v12, v8, 31 +; CHECK-NEXT: vsrl.vi v8, v8, 1 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> shufflevector(<16 x i32> insertelement(<16 x i32> poison, i32 1, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)) + ret <16 x i32> %x +} + +define <16 x i32> @vror_vi_rotl_v16i32(<16 x i32> %a) { +; CHECK-LABEL: vror_vi_rotl_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v12, v8, 31 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v16i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 31 +; CHECK-ZVBB-NEXT: ret + %x = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> shufflevector(<16 x i32> insertelement(<16 x i32> poison, i32 1, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)) + ret <16 x i32> %x +} + +declare <1 x i64> @llvm.fshr.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.fshl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) + +define <1 x i64> @vror_vv_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-RV32-LABEL: vror_vv_v1i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-RV32-NEXT: vmv.s.x v10, a0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vv v11, v11, v10 +; CHECK-RV32-NEXT: vsll.vv v11, v8, v11 +; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v8, v11 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vv_v1i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i64> @llvm.fshr.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %x +} + +define <1 x i64> @vror_vx_v1i64(<1 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vror_vx_v1i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-RV32-NEXT: vmv.s.x v10, a0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vv v11, v11, v10 +; CHECK-RV32-NEXT: vsll.vv v11, v8, v11 +; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v8, v11 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vx_v1i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v9, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vror_vx_v1i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero +; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vror_vx_v1i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <1 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <1 x i64> %b.head, <1 x i64> poison, <1 x i32> zeroinitializer + %x = call <1 x i64> @llvm.fshr.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> %b.splat) + ret <1 x i64> %x +} + +define <1 x i64> @vror_vi_v1i64(<1 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_v1i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-RV32-NEXT: vmv.s.x v9, a0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vv v10, v10, v9 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vand.vi v9, v9, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_v1i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV64-NEXT: vsll.vx v9, v8, a0 +; CHECK-RV64-NEXT: vsrl.vi v8, v8, 1 +; CHECK-RV64-NEXT: vor.vv v8, v8, v9 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i64> @llvm.fshr.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> shufflevector(<1 x i64> insertelement(<1 x i64> poison, i64 1, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)) + ret <1 x i64> %x +} + +define <1 x i64> @vror_vi_rotl_v1i64(<1 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_rotl_v1i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma +; CHECK-RV32-NEXT: vmv.s.x v9, a0 +; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vv v10, v10, v9 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vand.vi v9, v9, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_rotl_v1i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV64-NEXT: vsrl.vx v9, v8, a0 +; CHECK-RV64-NEXT: vadd.vv v8, v8, v8 +; CHECK-RV64-NEXT: vor.vv v8, v8, v9 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v1i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 63 +; CHECK-ZVBB-NEXT: ret + %x = call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> shufflevector(<1 x i64> insertelement(<1 x i64> poison, i64 1, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)) + ret <1 x i64> %x +} + +declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @vror_vv_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-RV32-LABEL: vror_vv_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vand.vx v10, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v11, 0 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vv_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %x +} + +define <2 x i64> @vror_vx_v2i64(<2 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vror_vx_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v10, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v11, 0 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vx_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v9, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v10, v9, a0 +; CHECK-RV64-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV64-NEXT: vand.vx v9, v9, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v9 +; CHECK-RV64-NEXT: vor.vv v8, v10, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vror_vx_v2i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v9, (a0), zero +; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v9 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vror_vx_v2i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <2 x i64> %b.head, <2 x i64> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %b.splat) + ret <2 x i64> %x +} + +define <2 x i64> @vror_vi_v2i64(<2 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vx v9, v9, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vsll.vx v9, v8, a0 +; CHECK-RV64-NEXT: vsrl.vi v8, v8, 1 +; CHECK-RV64-NEXT: vor.vv v8, v8, v9 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> shufflevector(<2 x i64> insertelement(<2 x i64> poison, i64 1, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)) + ret <2 x i64> %x +} + +define <2 x i64> @vror_vi_rotl_v2i64(<2 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_rotl_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsub.vx v9, v9, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_rotl_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vsrl.vx v9, v8, a0 +; CHECK-RV64-NEXT: vadd.vv v8, v8, v8 +; CHECK-RV64-NEXT: vor.vv v8, v8, v9 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v2i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 63 +; CHECK-ZVBB-NEXT: ret + %x = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> shufflevector(<2 x i64> insertelement(<2 x i64> poison, i64 1, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)) + ret <2 x i64> %x +} + +declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @vror_vv_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-RV32-LABEL: vror_vv_v4i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vand.vx v12, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v14, 0 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vv_v4i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vand.vx v12, v10, a0 +; CHECK-RV64-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 +; CHECK-RV64-NEXT: vand.vx v10, v10, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV64-NEXT: vor.vv v8, v12, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v10 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> %b) + ret <4 x i64> %x +} + +define <4 x i64> @vror_vx_v4i64(<4 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vror_vx_v4i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v12, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v14, 0 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vx_v4i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v10, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v12, v10, a0 +; CHECK-RV64-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 +; CHECK-RV64-NEXT: vand.vx v10, v10, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV64-NEXT: vor.vv v8, v12, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vror_vx_v4i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v10, (a0), zero +; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v10 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vror_vx_v4i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <4 x i64> %b.head, <4 x i64> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> %b.splat) + ret <4 x i64> %x +} + +define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_v4i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_v4i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vsll.vx v10, v8, a0 +; CHECK-RV64-NEXT: vsrl.vi v8, v8, 1 +; CHECK-RV64-NEXT: vor.vv v8, v8, v10 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> shufflevector(<4 x i64> insertelement(<4 x i64> poison, i64 1, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)) + ret <4 x i64> %x +} + +define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_rotl_v4i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_rotl_v4i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vsrl.vx v10, v8, a0 +; CHECK-RV64-NEXT: vadd.vv v8, v8, v8 +; CHECK-RV64-NEXT: vor.vv v8, v8, v10 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 63 +; CHECK-ZVBB-NEXT: ret + %x = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> shufflevector(<4 x i64> insertelement(<4 x i64> poison, i64 1, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)) + ret <4 x i64> %x +} + +declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) +declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) + +define <8 x i64> @vror_vv_v8i64(<8 x i64> %a, <8 x i64> %b) { +; CHECK-RV32-LABEL: vror_vv_v8i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vand.vx v16, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v20, 0 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vv_v8i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV64-NEXT: vand.vx v16, v12, a0 +; CHECK-RV64-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 +; CHECK-RV64-NEXT: vand.vx v12, v12, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV64-NEXT: vor.vv v8, v16, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vv_v8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vv v8, v8, v12 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> %b) + ret <8 x i64> %x +} + +define <8 x i64> @vror_vx_v8i64(<8 x i64> %a, i64 %b) { +; CHECK-RV32-LABEL: vror_vx_v8i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: sw a0, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v16, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v20, 0 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vx_v8i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV64-NEXT: vmv.v.x v12, a0 +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vand.vx v16, v12, a0 +; CHECK-RV64-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 +; CHECK-RV64-NEXT: vand.vx v12, v12, a0 +; CHECK-RV64-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV64-NEXT: vor.vv v8, v16, v8 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB32-LABEL: vror_vx_v8i64: +; CHECK-ZVBB32: # %bb.0: +; CHECK-ZVBB32-NEXT: addi sp, sp, -16 +; CHECK-ZVBB32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB32-NEXT: sw a0, 12(sp) +; CHECK-ZVBB32-NEXT: sw a0, 8(sp) +; CHECK-ZVBB32-NEXT: addi a0, sp, 8 +; CHECK-ZVBB32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB32-NEXT: vlse64.v v12, (a0), zero +; CHECK-ZVBB32-NEXT: vror.vv v8, v8, v12 +; CHECK-ZVBB32-NEXT: addi sp, sp, 16 +; CHECK-ZVBB32-NEXT: ret +; +; CHECK-ZVBB64-LABEL: vror_vx_v8i64: +; CHECK-ZVBB64: # %bb.0: +; CHECK-ZVBB64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB64-NEXT: vror.vx v8, v8, a0 +; CHECK-ZVBB64-NEXT: ret + %b.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %b.splat = shufflevector <8 x i64> %b.head, <8 x i64> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> %b.splat) + ret <8 x i64> %x +} + +define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_v8i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v12, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_v8i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV64-NEXT: vsll.vx v12, v8, a0 +; CHECK-RV64-NEXT: vsrl.vi v8, v8, 1 +; CHECK-RV64-NEXT: vor.vv v8, v8, v12 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_v8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 1 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> shufflevector(<8 x i64> insertelement(<8 x i64> poison, i64 1, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)) + ret <8 x i64> %x +} + +define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) { +; CHECK-RV32-LABEL: vror_vi_rotl_v8i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v12, 0 +; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: vror_vi_rotl_v8i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a0, 63 +; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV64-NEXT: vsrl.vx v12, v8, a0 +; CHECK-RV64-NEXT: vadd.vv v8, v8, v8 +; CHECK-RV64-NEXT: vor.vv v8, v8, v12 +; CHECK-RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vror_vi_rotl_v8i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-NEXT: vror.vi v8, v8, 63 +; CHECK-ZVBB-NEXT: ret + %x = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> shufflevector(<8 x i64> insertelement(<8 x i64> poison, i64 1, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)) + ret <8 x i64> %x +} +