diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -57,6 +57,18 @@ bool selectVSplatSimm5(SDValue N, SDValue &SplatVal); bool selectVSplatUimm5(SDValue N, SDValue &SplatVal); + bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); + template + bool selectRVVSimm5(SDValue N, SDValue &Imm) { + return selectRVVSimm5(N, Width, Imm); + } + + bool selectRVVUimm5(SDValue N, unsigned Width, SDValue &Imm); + template + bool selectRVVUimm5(SDValue N, SDValue &Imm) { + return selectRVVUimm5(N, Width, Imm); + } + void selectVLSEG(SDNode *Node, unsigned IntNo, bool IsStrided); void selectVLSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided); void selectVLSEGFF(SDNode *Node); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1008,6 +1008,34 @@ return true; } +bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm) { + if (auto *C = dyn_cast(N)) { + int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); + + if (!isInt<5>(ImmVal)) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); + return true; + } + + return false; +} + +bool RISCVDAGToDAGISel::selectRVVUimm5(SDValue N, unsigned Width, SDValue &Imm) { + if (auto *C = dyn_cast(N)) { + int64_t ImmVal = C->getSExtValue(); + + if (!isUInt<5>(ImmVal)) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); + return true; + } + + return false; +} + // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -4056,15 +4056,8 @@ (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMV_V_V_"#vti.LMul.MX) $rs1, GPR:$vl, vti.SEW)>; -} -foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), - (!cast("PseudoVMV_V_X_"#vti.LMul.MX) - $rs2, GPR:$vl, vti.SEW)>; - def : Pat<(vti.Vector (riscv_vmv_v_x_vl simm5:$imm5, (XLenVT (VLOp GPR:$vl)))), - (!cast("PseudoVMV_V_I_"#vti.LMul.MX) - simm5:$imm5, GPR:$vl, vti.SEW)>; + // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrINfoVVLPatterns.td } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -93,7 +93,21 @@ def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>; def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; -def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; +def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; + +// Ignore the vl operand. +def SplatIntOp : PatFrag<(ops node:$op), + (riscv_vmv_v_x_vl node:$op, srcvalue)>; + +def sew8simm5 : ComplexPattern", []>; +def sew16simm5 : ComplexPattern", []>; +def sew32simm5 : ComplexPattern", []>; +def sew64simm5 : ComplexPattern", []>; + +def sew8uimm5 : ComplexPattern", []>; +def sew16uimm5 : ComplexPattern", []>; +def sew32uimm5 : ComplexPattern", []>; +def sew64uimm5 : ComplexPattern", []>; class VPatBinaryVL_VV; +class VPatBinaryVL_VX : + Pat<(result_type (vop + (vop_type vop_reg_class:$rs1), + (vop_type (SplatIntOp GPR:$rs2)), + (mask_type true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#_#suffix#_# vlmul.MX) + vop_reg_class:$rs1, + GPR:$rs2, + GPR:$vl, sew)>; + +class VPatBinaryVL_VI : + Pat<(result_type (vop + (vop_type vop_reg_class:$rs1), + (vop_type (SplatIntOp (ImmPat XLenVT:$rs2))), + (mask_type true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast(instruction_name#_#suffix#_# vlmul.MX) + vop_reg_class:$rs1, + XLenVT:$rs2, + GPR:$vl, sew)>; + multiclass VPatBinaryVL_VV_VX { foreach vti = AllIntegerVectors in { def : VPatBinaryVL_VV; - // FIXME: Support splats. + def : VPatBinaryVL_VX; } } multiclass VPatBinaryVL_VV_VX_VI { + string ImmType = "simm5"> { foreach vti = AllIntegerVectors in { def : VPatBinaryVL_VV; - // FIXME: Support splats. + def : VPatBinaryVL_VX; + def : VPatBinaryVL_VI("sew"#vti.SEW#ImmType)>; } } @@ -163,6 +228,21 @@ // 12.1. Vector Single-Width Integer Add and Subtract defm "" : VPatBinaryVL_VV_VX_VI; defm "" : VPatBinaryVL_VV_VX; +// Handle VRSUB specially since it's the only integer binary op with reversed +// pattern operands +foreach vti = AllIntegerVectors in { + def : Pat<(riscv_sub_vl (vti.Vector (SplatIntOp XLenVT:$rs2)), + (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVRSUB_VX_"# vti.LMul.MX) + vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; + defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); + def : Pat<(riscv_sub_vl (vti.Vector (SplatIntOp (ImmPat XLenVT:$rs2))), + (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl))), + (!cast("PseudoVRSUB_VI_"# vti.LMul.MX) + vti.RegClass:$rs1, XLenVT:$rs2, GPR:$vl, vti.SEW)>; +} // 12.5. Vector Bitwise Logical Instructions defm "" : VPatBinaryVL_VV_VX_VI; @@ -170,9 +250,9 @@ defm "" : VPatBinaryVL_VV_VX_VI; // 12.6. Vector Single-Width Bit Shift Instructions -defm "" : VPatBinaryVL_VV_VX_VI; -defm "" : VPatBinaryVL_VV_VX_VI; -defm "" : VPatBinaryVL_VV_VX_VI; +defm "" : VPatBinaryVL_VV_VX_VI; +defm "" : VPatBinaryVL_VV_VX_VI; +defm "" : VPatBinaryVL_VV_VX_VI; // 12.10. Vector Single-Width Integer Multiply Instructions defm "" : VPatBinaryVL_VV_VX; @@ -183,6 +263,18 @@ defm "" : VPatBinaryVL_VV_VX; defm "" : VPatBinaryVL_VV_VX; +// 12.17. Vector Integer Move Instructions +foreach vti = AllIntegerVectors in { + def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMV_V_X_"#vti.LMul.MX) + $rs2, GPR:$vl, vti.SEW)>; + defvar ImmPat = !cast("sew"#vti.SEW#"simm5"); + def : Pat<(vti.Vector (riscv_vmv_v_x_vl (ImmPat XLenVT:$imm5), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVMV_V_I_"#vti.LMul.MX) + XLenVT:$imm5, GPR:$vl, vti.SEW)>; +} + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3435,3 +3435,1638 @@ store <4 x i64> %c, <4 x i64>* %x ret void } + +define void @add_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: add_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: add_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: add_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @add_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: add_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: add_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: add_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @add_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: add_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: add_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: add_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @add_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: add_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = add <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @add_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: add_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = add <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @add_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: add_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vadd.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = add <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: sub_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: sub_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: sub_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: sub_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: sub_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: sub_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrsub.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: sub_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: sub_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: sub_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sub_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: sub_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrsub.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sub <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sub_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: sub_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrsub.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sub <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sub_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: sub_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrsub.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sub <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @mul_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: mul_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = mul <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @mul_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: mul_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = mul <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @mul_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: mul_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = mul <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @mul_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: mul_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = mul <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @mul_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: mul_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = mul <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @mul_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: mul_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vmul.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = mul <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: and_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -2, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: and_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -2, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: and_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, -2 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -2, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: and_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: and_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: and_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: and_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: and_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: and_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @and_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: and_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = and <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @and_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: and_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = and <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @and_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: and_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vand.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = and <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: or_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -2, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: or_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -2, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: or_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, -2 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -2, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: or_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: or_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: or_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: or_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: or_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: or_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @or_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: or_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = or <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @or_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: or_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = or <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @or_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: or_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = or <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: xor_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 -1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: xor_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 -1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: xor_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 -1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_iv_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: xor_iv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 1, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_iv_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: xor_iv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 1, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_iv_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: xor_iv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, 1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 1, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: xor_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: xor_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: xor_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @xor_xv_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: xor_xv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = xor <16 x i8> %c, %a + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @xor_xv_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: xor_xv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = xor <8 x i16> %c, %a + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @xor_xv_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: xor_xv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vxor.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = xor <4 x i32> %c, %a + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @lshr_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: lshr_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 7 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 7, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = lshr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @lshr_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: lshr_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 15 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 15, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = lshr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @lshr_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: lshr_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsrl.vi v25, v25, 31 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 31, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = lshr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @lshr_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: lshr_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = lshr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @lshr_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: lshr_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = lshr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @lshr_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: lshr_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = lshr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @ashr_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: ashr_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 7 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 7, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = ashr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @ashr_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: ashr_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 15 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 15, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = ashr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @ashr_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: ashr_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsra.vi v25, v25, 31 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 31, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = ashr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @ashr_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: ashr_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsra.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = ashr <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @ashr_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: ashr_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsra.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = ashr <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @ashr_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: ashr_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsra.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = ashr <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @shl_vi_v16i8(<16 x i8>* %x) { +; CHECK-LABEL: shl_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 16 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 7 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 7, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = shl <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @shl_vi_v8i16(<8 x i16>* %x) { +; CHECK-LABEL: shl_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 15 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 15, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = shl <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @shl_vi_v4i32(<4 x i32>* %x) { +; CHECK-LABEL: shl_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsll.vi v25, v25, 31 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 31, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = shl <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @shl_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: shl_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = shl <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @shl_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: shl_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = shl <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @shl_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: shl_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsll.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = shl <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @sdiv_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: sdiv_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vdiv.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = sdiv <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @sdiv_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: sdiv_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vdiv.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = sdiv <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @sdiv_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: sdiv_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vdiv.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = sdiv <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @srem_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: srem_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrem.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = srem <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @srem_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: srem_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrem.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = srem <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @srem_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: srem_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrem.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = srem <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @udiv_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: udiv_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vdivu.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = udiv <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @udiv_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: udiv_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vdivu.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = udiv <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @udiv_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: udiv_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vdivu.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = udiv <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +} + +define void @urem_vx_v16i8(<16 x i8>* %x, i8 %y) { +; CHECK-LABEL: urem_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vremu.vx v25, v25, a1 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = insertelement <16 x i8> undef, i8 %y, i32 0 + %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer + %d = urem <16 x i8> %a, %c + store <16 x i8> %d, <16 x i8>* %x + ret void +} + +define void @urem_vx_v8i16(<8 x i16>* %x, i16 %y) { +; CHECK-LABEL: urem_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vremu.vx v25, v25, a1 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = insertelement <8 x i16> undef, i16 %y, i32 0 + %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer + %d = urem <8 x i16> %a, %c + store <8 x i16> %d, <8 x i16>* %x + ret void +} + +define void @urem_vx_v4i32(<4 x i32>* %x, i32 %y) { +; CHECK-LABEL: urem_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vremu.vx v25, v25, a1 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = insertelement <4 x i32> undef, i32 %y, i32 0 + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer + %d = urem <4 x i32> %a, %c + store <4 x i32> %d, <4 x i32>* %x + ret void +}