Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1625,6 +1625,11 @@ return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; } + /// If target can replicate a byte in a vector register and store (parts + /// of) it, do that for a memset instead of a multiplication of the value + /// with 0x010101... + virtual bool prefersVectorSplatForMemset() const { return false; } + /// Get maximum # of store operations permitted for llvm.memcpy /// /// This function returns the maximum number of store operations permitted @@ -1733,7 +1738,8 @@ /// Returns the target specific optimal type for load and store operations as /// a result of memset, memcpy, and memmove lowering. /// It returns EVT::Other if the type should be determined using generic - /// target-independent logic. + /// target-independent logic. MVT::Untyped can be returned for a case that + /// is handled in some other way by the target. virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList & /*FuncAttributes*/) const { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6370,6 +6370,17 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(!Value.isUndef()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); + + if (TLI.prefersVectorSplatForMemset()) { + // XXX Maybe simply check for vector VT instead? + EVT ByteReplVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, + VT.getSizeInBits() / 8); + Value = DAG.getSplatBuildVector(ByteReplVT, dl, Value); + return DAG.getBitcast(VT, Value); + } unsigned NumBits = VT.getScalarSizeInBits(); if (ConstantSDNode *C = dyn_cast(Value)) { @@ -6384,7 +6395,6 @@ VT); } - assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); EVT IntVT = VT.getScalarType(); if (!IntVT.isInteger()) IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); @@ -6861,6 +6871,7 @@ DstAlignCanChange = true; bool IsZeroVal = isa(Src) && cast(Src)->isZero(); + if (!TLI.findOptimalMemOpLowering( MemOps, TLI.getMaxStoresPerMemset(OptSize), MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), @@ -6910,6 +6921,14 @@ if (!LargestVT.isVector() && !VT.isVector() && TLI.isTruncateFree(LargestVT, VT)) Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else if (TLI.prefersVectorSplatForMemset()) { + // XXX Maybe simply check for vector VT instead? + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), VT, + LargestVT.getSizeInBits() / VT.getSizeInBits()); + Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, + DAG.getBitcast(VecVT, Value), + DAG.getVectorIdxConstant(0, dl)); + } else Value = getMemsetValue(Src, VT, DAG, dl); } Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -199,6 +199,8 @@ return false; EVT VT = getOptimalMemOpType(Op, FuncAttributes); + if (VT == MVT::Untyped) + return false; if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -457,6 +457,11 @@ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const override; + + bool prefersVectorSplatForMemset() const override; + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + bool isTruncateFree(Type *, Type *) const override; bool isTruncateFree(EVT, EVT) const override; @@ -467,6 +472,8 @@ return VT == MVT::i32 || VT == MVT::i64; } + bool shouldConsiderGEPOffsetSplit() const override; + const char *getTargetNodeName(unsigned Opcode) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -77,6 +77,10 @@ return Op; } +// EXPERIMENTAL +static cl::opt MaxStoresMemset("max-stores-memset", cl::init(0)); +static cl::opt MaxStoresMemcpy("max-stores-memcpy", cl::init(0)); + SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -666,7 +670,7 @@ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. - MaxStoresPerMemcpy = 0; + MaxStoresPerMemcpy = Subtarget.hasVector() ? MaxStoresMemcpy : 0; MaxStoresPerMemcpyOptSize = 0; // The main memset sequence is a byte store followed by an MVC. @@ -674,7 +678,7 @@ // generated by target-independent code don't when the byte value is // variable. E.g. "STC ;MHI ,257;STH " is not better // than "STC;MVC". Handle the choice in target-specific code instead. - MaxStoresPerMemset = 0; + MaxStoresPerMemset = Subtarget.hasVector() ? MaxStoresMemset : 0; MaxStoresPerMemsetOptSize = 0; // Default to having -disable-strictnode-mutation on @@ -962,6 +966,14 @@ return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); } +// EXPERIMENTAL +static cl::opt LegalAMVecTy("legalam-vec", cl::init(false), cl::Hidden); +static cl::opt GEPOffsSplit("gepoffssplit", cl::init(false), cl::Hidden); + +bool SystemZTargetLowering::shouldConsiderGEPOffsetSplit() const { + return GEPOffsSplit; +} + bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // Punt on globals for now, although they can be used in limited @@ -973,7 +985,8 @@ if (!isInt<20>(AM.BaseOffs)) return false; - AddressingMode SupportedAM(true, true); + bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy() && LegalAMVecTy; + AddressingMode SupportedAM(!RequireD12, true); if (I != nullptr) SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); @@ -988,6 +1001,32 @@ return AM.Scale == 0 || AM.Scale == 1; } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt MemsetSplat("memset-splat", cl::init(true)); +static cl::opt MVI_TYPEFIX("mvi-typefix", cl::init(false)); +static cl::opt BYTEREPL_FIX("byterepl-fix", cl::init(false)); + +bool SystemZTargetLowering::prefersVectorSplatForMemset() const { + return Subtarget.hasVector() && MemsetSplat; +} + +EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const { + const int MVCFastLen = 16; + + // Return MVT::Untyped in these cases to indicate that a load/store + // sequence is not desired. + if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) + return MVT::Untyped; // Small memcpy: Use MVC + if (Op.isMemset() && Op.size() <= MVCFastLen) + return MVT::Untyped; // Small memset: Use MVC + if (Op.isZeroMemset()) + return MVT::Untyped; // Memset zero: Use XC + + return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; +} + bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) return false; @@ -6308,6 +6347,111 @@ } } + // EXPERIMENTAL: Make sure store of small constant uses MVI. This just + // changes the type of the constant to i32, which is needed for + // SelectionDAGs expansion of memset using extracts from a replicated value. + // TODO: better to fix isel pattern for MVI... + if (MVI_TYPEFIX && Op1.getValueType() == MVT::i64) + if (auto *C = dyn_cast(Op1)) { + int64_t CVal = C->getSExtValue(); + if ((MemVT == MVT::i8 && (isInt<8>(CVal) || isUInt<8>(CVal))) || + (MemVT == MVT::i16 && (isInt<16>(CVal) || isUInt<16>(CVal))) || + (MemVT == MVT::i32 && (isInt<32>(CVal) || isUInt<32>(CVal)))) + return DAG.getTruncStore(SN->getChain(), SDLoc(SN), + DAG.getConstant(CVal, SDLoc(SN), MVT::i32), + SN->getBasePtr(), MemVT, SN->getMemOperand()); + } + + // EXPERIMENTAL: replicate a byte with VREP instead of scalar mul. This is + // the code we get from SelectionDAGs expansion of memset if not fixing it + // to emit a vector directly. The immediate case could be generalized for + // any bigger constant using the SystemZVectorConstantInfo... + bool OnlyUsedByStores = true; + for (auto *U : Op1->uses()) + if (StoreSDNode *ST = dyn_cast(U)) { + if (ST->isTruncatingStore() || + (!isTypeLegal(ST->getMemoryVT()) && MemVT != MVT::i16)) + OnlyUsedByStores = false; + } + else + OnlyUsedByStores = false; + + if (BYTEREPL_FIX && OnlyUsedByStores) { + unsigned NumMemBytes = MemVT.getStoreSizeInBits() / 8; + uint32_t SplatVal; + auto IsReplicatedByteImm = [&SplatVal, &NumMemBytes](uint64_t CVal) { + SplatVal = CVal & 0xff; + for (unsigned B = 1 ; B < NumMemBytes; B++) + if (((CVal >> (B * 8)) & 0xff) != SplatVal) + return false; + return true; + }; + + SDValue Byte = SDValue(); + auto IsReplicatedByteReg = [&](SDValue MulOp) { + EVT MulVT = MulOp.getValueType(); + if (MulOp->getOpcode() == ISD::MUL && + (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { + if (auto *C = dyn_cast(MulOp->getOperand(1))) + if (!(IsReplicatedByteImm(C->getZExtValue()) && SplatVal == 1)) + return false; + SDValue ZExt = MulOp->getOperand(0); + if (ZExt->getOpcode() == ISD::ZERO_EXTEND && + ZExt->getOperand(0).getValueType() == MVT::i8) { + Byte = ZExt->getOperand(0); + return true; + } + } + return false; + }; + + if (isa(Op1) && Op1.getValueType() == MVT::v2i64 && + IsReplicatedByteReg(Op1->getOperand(0))) { + SDValue SplatV = DAG.getSplatVector(MVT::v16i8, SDLoc(SN), Byte); + return DAG.getStore(SN->getChain(), SDLoc(SN), SplatV, + SN->getBasePtr(), SN->getMemOperand()); + } + + if (IsReplicatedByteReg(Op1)) { + SDValue SplatV = DAG.getSplatVector(MVT::v16i8, SDLoc(SN), Byte); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MemVT, + SystemZ::VectorBytes / NumMemBytes); + SDValue ValueToStore = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(SN), MemVT, + DAG.getBitcast(VecVT, SplatV), + DAG.getVectorIdxConstant(0, SDLoc(SN))); + return DAG.getStore(SN->getChain(), SDLoc(SN), ValueToStore, + SN->getBasePtr(), SN->getMemOperand()); + } + + // Store replicated immediate with VREPI+VST. + APInt ConstValue; + if (isa(Op1) && Op1.getValueType() == MVT::v2i64 && + ISD::isConstantSplatVector(Op1.getNode(), ConstValue) && + IsReplicatedByteImm(ConstValue.getZExtValue())) { + SDValue SplatV = DAG.getSplatVector(MVT::v16i8, SDLoc(SN), + DAG.getConstant(SplatVal, SDLoc(SN), MVT::i32)); + return DAG.getStore(SN->getChain(), SDLoc(SN), SplatV, + SN->getBasePtr(), SN->getMemOperand()); + } + + if (auto *C = dyn_cast(Op1)) + if (IsReplicatedByteImm(C->getZExtValue())) { + if (SplatVal == 0 || SplatVal == 0xff || NumMemBytes <= 2) + return SDValue(); + SDValue SplatV = DAG.getSplatVector(MVT::v16i8, SDLoc(SN), + DAG.getConstant(SplatVal, SDLoc(SN), MVT::i32)); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MemVT, + SystemZ::VectorBytes / NumMemBytes); + SDValue ValueToStore = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(SN), MemVT, + DAG.getBitcast(VecVT, SplatV), + DAG.getVectorIdxConstant(0, SDLoc(SN))); + return DAG.getStore(SN->getChain(), SDLoc(SN), ValueToStore, + SN->getBasePtr(), SN->getMemOperand()); + } + } + return SDValue(); } Index: llvm/test/CodeGen/SystemZ/memcpy-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/memcpy-03.ll @@ -0,0 +1,440 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test memcpys of small constant lengths, that should not be done with MVC. +; +; RUN: llc -mcpu=z15 -max-stores-memcpy=3 < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i1) nounwind + +define void @fun16(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun16: +; CHECK: # %bb.0: +; CHECK-NEXT: mvc 0(16,%r3), 0(%r2) +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 16, i1 false) + ret void +} + +define void @fun17(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun17: +; CHECK: # %bb.0: +; CHECK-NEXT: lb %r0, 16(%r2) +; CHECK-NEXT: stc %r0, 16(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 17, i1 false) + ret void +} + +define void @fun18(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun18: +; CHECK: # %bb.0: +; CHECK-NEXT: lh %r0, 16(%r2) +; CHECK-NEXT: sth %r0, 16(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 18, i1 false) + ret void +} + +define void @fun19(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun19: +; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 15(%r2) +; CHECK-NEXT: st %r0, 15(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 19, i1 false) + ret void +} + +define void @fun20(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun20: +; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 16(%r2) +; CHECK-NEXT: st %r0, 16(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 20, i1 false) + ret void +} + +define void @fun21(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun21: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 13(%r2) +; CHECK-NEXT: stg %r0, 13(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 21, i1 false) + ret void +} + +define void @fun22(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun22: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 14(%r2) +; CHECK-NEXT: stg %r0, 14(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 22, i1 false) + ret void +} + +define void @fun23(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun23: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 15(%r2) +; CHECK-NEXT: stg %r0, 15(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 23, i1 false) + ret void +} + +define void @fun24(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun24: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 16(%r2) +; CHECK-NEXT: stg %r0, 16(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 24, i1 false) + ret void +} + +define void @fun25(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun25: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 9(%r2) +; CHECK-NEXT: vst %v0, 9(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 25, i1 false) + ret void +} + +define void @fun26(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun26: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 10(%r2) +; CHECK-NEXT: vst %v0, 10(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 26, i1 false) + ret void +} + +define void @fun27(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun27: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 11(%r2) +; CHECK-NEXT: vst %v0, 11(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 27, i1 false) + ret void +} + +define void @fun28(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun28: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 12(%r2) +; CHECK-NEXT: vst %v0, 12(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 28, i1 false) + ret void +} + +define void @fun29(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun29: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 13(%r2) +; CHECK-NEXT: vst %v0, 13(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 29, i1 false) + ret void +} + +define void @fun30(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun30: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 14(%r2) +; CHECK-NEXT: vst %v0, 14(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 30, i1 false) + ret void +} + +define void @fun31(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun31: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 15(%r2) +; CHECK-NEXT: vst %v0, 15(%r3) +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 31, i1 false) + ret void +} + +define void @fun32(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 32, i1 false) + ret void +} + +define void @fun33(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun33: +; CHECK: # %bb.0: +; CHECK-NEXT: lb %r0, 32(%r2) +; CHECK-NEXT: stc %r0, 32(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 33, i1 false) + ret void +} + +define void @fun34(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun34: +; CHECK: # %bb.0: +; CHECK-NEXT: lh %r0, 32(%r2) +; CHECK-NEXT: sth %r0, 32(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 34, i1 false) + ret void +} + +define void @fun35(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun35: +; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 31(%r2) +; CHECK-NEXT: st %r0, 31(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 35, i1 false) + ret void +} + +define void @fun36(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun36: +; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 32(%r2) +; CHECK-NEXT: st %r0, 32(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 36, i1 false) + ret void +} + +define void @fun37(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun37: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 29(%r2) +; CHECK-NEXT: stg %r0, 29(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 37, i1 false) + ret void +} + +define void @fun38(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun38: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 30(%r2) +; CHECK-NEXT: stg %r0, 30(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 38, i1 false) + ret void +} + +define void @fun39(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun39: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 31(%r2) +; CHECK-NEXT: stg %r0, 31(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 39, i1 false) + ret void +} + +define void @fun40(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun40: +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r0, 32(%r2) +; CHECK-NEXT: stg %r0, 32(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 40, i1 false) + ret void +} + +define void @fun41(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun41: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 25(%r2) +; CHECK-NEXT: vst %v0, 25(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 41, i1 false) + ret void +} + +define void @fun42(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun42: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 26(%r2) +; CHECK-NEXT: vst %v0, 26(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 42, i1 false) + ret void +} + +define void @fun43(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun43: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 27(%r2) +; CHECK-NEXT: vst %v0, 27(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 43, i1 false) + ret void +} + +define void @fun44(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun44: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 28(%r2) +; CHECK-NEXT: vst %v0, 28(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 44, i1 false) + ret void +} + +define void @fun45(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun45: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 29(%r2) +; CHECK-NEXT: vst %v0, 29(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 45, i1 false) + ret void +} + +define void @fun46(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun46: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 30(%r2) +; CHECK-NEXT: vst %v0, 30(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 46, i1 false) + ret void +} + +define void @fun47(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun47: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 31(%r2) +; CHECK-NEXT: vst %v0, 31(%r3) +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 47, i1 false) + ret void +} + +define void @fun48(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun48: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 32(%r2), 4 +; CHECK-NEXT: vst %v0, 32(%r3), 4 +; CHECK-NEXT: vl %v0, 16(%r2), 4 +; CHECK-NEXT: vst %v0, 16(%r3), 4 +; CHECK-NEXT: vl %v0, 0(%r2), 4 +; CHECK-NEXT: vst %v0, 0(%r3), 4 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 48, i1 false) + ret void +} + +define void @fun49(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: fun49: +; CHECK: # %bb.0: +; CHECK-NEXT: mvc 0(49,%r3), 0(%r2) +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 49, i1 false) + ret void +} Index: llvm/test/CodeGen/SystemZ/memset-08.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/memset-08.ll @@ -0,0 +1,1207 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test memsets of small constant lengths, that should not be done with MVC. +; +; RUN: llc -mcpu=z15 -max-stores-memset=3 \ +; RUN: < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +define void @reg16(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg16: +; CHECK: # %bb.0: +; CHECK-NEXT: stc %r4, 0(%r3) +; CHECK-NEXT: mvc 1(15,%r3), 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 16, i1 false) + ret void +} + +define void @reg17(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg17: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: stc %r4, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 17, i1 false) + ret void +} + +define void @reg18(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg18: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: vsteh %v0, 16(%r3), 0 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 18, i1 false) + ret void +} + +define void @reg19(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg19: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vstef %v0, 15(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 19, i1 false) + ret void +} + +define void @reg20(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg20: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vstef %v0, 16(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 20, i1 false) + ret void +} + +define void @reg21(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg21: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 13(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 21, i1 false) + ret void +} + +define void @reg22(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg22: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 14(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 22, i1 false) + ret void +} + +define void @reg23(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg23: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 15(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 23, i1 false) + ret void +} + +define void @reg24(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg24: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 16(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 24, i1 false) + ret void +} + +define void @reg25(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg25: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 9(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 25, i1 false) + ret void +} + +define void @reg26(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg26: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 10(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 26, i1 false) + ret void +} + +define void @reg27(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg27: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 11(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 27, i1 false) + ret void +} + +define void @reg28(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg28: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 12(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 28, i1 false) + ret void +} + +define void @reg29(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg29: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 13(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 29, i1 false) + ret void +} + +define void @reg30(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg30: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 14(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 30, i1 false) + ret void +} + +define void @reg31(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg31: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 15(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 31, i1 false) + ret void +} + +define void @reg32(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg32: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 32, i1 false) + ret void +} + +define void @reg33(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg33: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: stc %r4, 32(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 33, i1 false) + ret void +} + +define void @reg34(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg34: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vsteh %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 34, i1 false) + ret void +} + +define void @reg35(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg35: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vstef %v0, 31(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 35, i1 false) + ret void +} + +define void @reg36(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg36: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vstef %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 36, i1 false) + ret void +} + +define void @reg37(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg37: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 29(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 37, i1 false) + ret void +} + +define void @reg38(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg38: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 30(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 38, i1 false) + ret void +} + +define void @reg39(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg39: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 31(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 39, i1 false) + ret void +} + +define void @reg40(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg40: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vsteg %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 40, i1 false) + ret void +} + +define void @reg41(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg41: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 25(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 41, i1 false) + ret void +} + +define void @reg42(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg42: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 26(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 42, i1 false) + ret void +} + +define void @reg43(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg43: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 27(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 43, i1 false) + ret void +} + +define void @reg44(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg44: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 28(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 44, i1 false) + ret void +} + +define void @reg45(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg45: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 29(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 45, i1 false) + ret void +} + +define void @reg46(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg46: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 30(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 46, i1 false) + ret void +} + +define void @reg47(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg47: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 31(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 47, i1 false) + ret void +} + +define void @reg48(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg48: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vrepb %v0, %v0, 7 +; CHECK-NEXT: vst %v0, 32(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 48, i1 false) + ret void +} + +define void @reg49(i8* %Src, i8* %Dst, i8 %val) { +; CHECK-LABEL: reg49: +; CHECK: # %bb.0: +; CHECK-NEXT: stc %r4, 0(%r3) +; CHECK-NEXT: mvc 1(48,%r3), 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 %val, i64 49, i1 false) + ret void +} + +; Same, but with an immediate. First all ones, which is a special case. + +define void @ones16(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones16: +; CHECK: # %bb.0: +; CHECK-NEXT: mvghi 8(%r3), -1 +; CHECK-NEXT: mvghi 0(%r3), -1 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 16, i1 false) + ret void +} + +define void @ones17(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones17: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvi 16(%r3), 255 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 17, i1 false) + ret void +} + +define void @ones18(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones18: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvhhi 16(%r3), -1 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 18, i1 false) + ret void +} + +define void @ones19(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones19: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vstef %v0, 15(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 19, i1 false) + ret void +} + +define void @ones20(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones20: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vstef %v0, 16(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 20, i1 false) + ret void +} + +define void @ones21(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones21: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 13(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 21, i1 false) + ret void +} + +define void @ones22(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones22: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 14(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 22, i1 false) + ret void +} + +define void @ones23(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones23: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 15(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 23, i1 false) + ret void +} + +define void @ones24(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones24: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 16(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 24, i1 false) + ret void +} + +define void @ones25(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones25: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 9(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 25, i1 false) + ret void +} + +define void @ones26(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones26: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 10(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 26, i1 false) + ret void +} + +define void @ones27(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones27: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 11(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 27, i1 false) + ret void +} + +define void @ones28(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones28: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 12(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 28, i1 false) + ret void +} + +define void @ones29(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones29: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 13(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 29, i1 false) + ret void +} + +define void @ones30(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones30: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 14(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 30, i1 false) + ret void +} + +define void @ones31(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones31: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 15(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 31, i1 false) + ret void +} + +define void @ones32(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones32: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 32, i1 false) + ret void +} + +define void @ones33(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones33: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvi 32(%r3), 255 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 33, i1 false) + ret void +} + +define void @ones34(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones34: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvhhi 32(%r3), -1 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 34, i1 false) + ret void +} + +define void @ones35(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones35: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vstef %v0, 31(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 35, i1 false) + ret void +} + +define void @ones36(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones36: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vstef %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 36, i1 false) + ret void +} + +define void @ones37(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones37: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 29(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 37, i1 false) + ret void +} + +define void @ones38(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones38: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 30(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 38, i1 false) + ret void +} + +define void @ones39(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones39: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 31(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 39, i1 false) + ret void +} + +define void @ones40(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones40: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vsteg %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 40, i1 false) + ret void +} + +define void @ones41(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones41: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 25(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 41, i1 false) + ret void +} + +define void @ones42(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones42: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 26(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 42, i1 false) + ret void +} + +define void @ones43(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones43: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 27(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 43, i1 false) + ret void +} + +define void @ones44(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones44: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 28(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 44, i1 false) + ret void +} + +define void @ones45(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones45: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 29(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 45, i1 false) + ret void +} + +define void @ones46(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones46: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 30(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 46, i1 false) + ret void +} + +define void @ones47(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones47: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 31(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 47, i1 false) + ret void +} + +define void @ones48(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones48: +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: vst %v0, 32(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 48, i1 false) + ret void +} + +define void @ones49(i8* %Src, i8* %Dst) { +; CHECK-LABEL: ones49: +; CHECK: # %bb.0: +; CHECK-NEXT: mvi 0(%r3), 255 +; CHECK-NEXT: mvc 1(48,%r3), 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 -1, i64 49, i1 false) + ret void +} + +; Some, other immediate. + +define void @other16(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other16: +; CHECK: # %bb.0: +; CHECK-NEXT: mvi 0(%r3), 1 +; CHECK-NEXT: mvc 1(15,%r3), 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 16, i1 false) + ret void +} + +define void @other17(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other17: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvi 16(%r3), 1 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 17, i1 false) + ret void +} + +define void @other18(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other18: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvhhi 16(%r3), 257 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 18, i1 false) + ret void +} + +define void @other19(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other19: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vstef %v0, 15(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 19, i1 false) + ret void +} + +define void @other20(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other20: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vstef %v0, 16(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 20, i1 false) + ret void +} + +define void @other21(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other21: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 13(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 21, i1 false) + ret void +} + +define void @other22(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other22: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 14(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 22, i1 false) + ret void +} + +define void @other23(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other23: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 15(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 23, i1 false) + ret void +} + +define void @other24(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other24: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 16(%r3), 0 +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 24, i1 false) + ret void +} + +define void @other25(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other25: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 9(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 25, i1 false) + ret void +} + +define void @other26(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other26: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 10(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 26, i1 false) + ret void +} + +define void @other27(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other27: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 11(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 27, i1 false) + ret void +} + +define void @other28(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other28: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 12(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 28, i1 false) + ret void +} + +define void @other29(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other29: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 13(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 29, i1 false) + ret void +} + +define void @other30(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other30: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 14(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 30, i1 false) + ret void +} + +define void @other31(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other31: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 15(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 31, i1 false) + ret void +} + +define void @other32(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 32, i1 false) + ret void +} + +define void @other33(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other33: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvi 32(%r3), 1 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 33, i1 false) + ret void +} + +define void @other34(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other34: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: mvhhi 32(%r3), 257 +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 34, i1 false) + ret void +} + +define void @other35(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other35: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vstef %v0, 31(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 35, i1 false) + ret void +} + +define void @other36(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other36: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vstef %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 36, i1 false) + ret void +} + +define void @other37(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other37: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 29(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 37, i1 false) + ret void +} + +define void @other38(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other38: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 30(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 38, i1 false) + ret void +} + +define void @other39(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other39: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 31(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 39, i1 false) + ret void +} + +define void @other40(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other40: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsteg %v0, 32(%r3), 0 +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 40, i1 false) + ret void +} + +define void @other41(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other41: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 25(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 41, i1 false) + ret void +} + +define void @other42(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other42: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 26(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 42, i1 false) + ret void +} + +define void @other43(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other43: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 27(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 43, i1 false) + ret void +} + +define void @other44(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other44: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 28(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 44, i1 false) + ret void +} + +define void @other45(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other45: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 29(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 45, i1 false) + ret void +} + +define void @other46(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other46: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 30(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 46, i1 false) + ret void +} + +define void @other47(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other47: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 31(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 47, i1 false) + ret void +} + +define void @other48(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other48: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vst %v0, 32(%r3) +; CHECK-NEXT: vst %v0, 16(%r3) +; CHECK-NEXT: vst %v0, 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 48, i1 false) + ret void +} + +define void @other49(i8* %Src, i8* %Dst) { +; CHECK-LABEL: other49: +; CHECK: # %bb.0: +; CHECK-NEXT: mvi 0(%r3), 1 +; CHECK-NEXT: mvc 1(48,%r3), 0(%r3) +; CHECK-NEXT: br %r14 + call void @llvm.memset.p0i8.i64(i8* %Dst, i8 1, i64 49, i1 false) + ret void +}