diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -452,6 +452,12 @@ LDP, STP, STNP, + + // Memory Operations + MOPS_MEMSET, + MOPS_MEMSET_TAGGING, + MOPS_MEMCOPY, + MOPS_MEMMOVE, }; } // end namespace AArch64ISD @@ -889,6 +895,7 @@ SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" @@ -938,19 +939,20 @@ // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemset = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemsetOptSize : 32; + MaxStoresPerMemset = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32; MaxGluedStoresPerMemcpy = 4; MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemcpyOptSize : 16; + MaxStoresPerMemcpy = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16; - MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemmove = 4; MaxLoadsPerMemcmpOptSize = 4; - MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() - ? MaxLoadsPerMemcmpOptSize : 8; + MaxLoadsPerMemcmp = + Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8; setStackPointerRegisterToSaveRestore(AArch64::SP); @@ -1426,6 +1428,11 @@ setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); } + if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { + // Only required for llvm.aarch64.mops.memset.tag + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -2267,6 +2274,10 @@ MAKE_CASE(AArch64ISD::UADDLP) MAKE_CASE(AArch64ISD::CALL_RVMARKER) MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) + MAKE_CASE(AArch64ISD::MOPS_MEMSET) + MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) + MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) + MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) } #undef MAKE_CASE return nullptr; @@ -4059,6 +4070,39 @@ return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); } +SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::aarch64_mops_memset_tag: { + auto Node = cast(Op.getNode()); + SDLoc DL(Op); + SDValue Chain = Node->getChain(); + SDValue Dst = Op.getOperand(2); + SDValue Val = Op.getOperand(3); + Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64); + SDValue Size = Op.getOperand(4); + auto Alignment = Node->getMemOperand()->getAlign(); + bool IsVol = Node->isVolatile(); + auto DstPtrInfo = Node->getPointerInfo(); + + const auto &SDI = + static_cast(DAG.getSelectionDAGInfo()); + SDValue MS = + SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val, + Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{}); + + // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the + // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise + // LowerOperationWrapper will complain that the number of results has + // changed. + return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL); + } + } +} + SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -5126,6 +5170,8 @@ case ISD::MULHU: return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, /*OverrideNEON=*/true); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::ATOMIC_STORE: @@ -11879,6 +11925,20 @@ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; return true; } + case Intrinsic::aarch64_mops_memset_tag: { + Value *Dst = I.getArgOperand(0); + Value *Val = I.getArgOperand(1); + PointerType *PtrTy = cast(Dst->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(Val->getType()); + Info.ptrVal = Dst; + Info.offset = 0; + Info.align = DL.getABITypeAlign(PtrTy->getElementType()); + Info.flags = MachineMemOperand::MOStore; + // The size of the memory being operated on is unknown at this point + Info.size = MemoryLocation::UnknownSize; + return true; + } default: break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8362,6 +8362,14 @@ } } +// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain +// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain +def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; +def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; +def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; +def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; +def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; + let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { let mayLoad = 1 in { def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -19,11 +19,30 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { public: + SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, + const SDLoc &DL, SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, Align Alignment, + bool isVolatile, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, MachinePointerInfo DstPtrInfo, diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -15,15 +15,95 @@ #define DEBUG_TYPE "aarch64-selectiondag-info" +SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, + SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + + // Get the constant size of the copy/set. + uint64_t ConstSize = 0; + if (auto *C = dyn_cast(Size)) + ConstSize = C->getZExtValue(); + + const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET || + SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING; + + const auto MachineOpcode = [&]() { + switch (SDOpcode) { + case AArch64ISD::MOPS_MEMSET: + return AArch64::MOPSMemorySetPseudo; + case AArch64ISD::MOPS_MEMSET_TAGGING: + return AArch64::MOPSMemorySetTaggingPseudo; + case AArch64ISD::MOPS_MEMCOPY: + return AArch64::MOPSMemoryCopyPseudo; + case AArch64ISD::MOPS_MEMMOVE: + return AArch64::MOPSMemoryMovePseudo; + default: + llvm_unreachable("Unhandled MOPS ISD Opcode"); + } + }(); + + MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (!IsSet) + Flags |= MachineMemOperand::MOLoad; + + MachineFunction &MF = DAG.getMachineFunction(); + + auto *DstOp = + MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment); + auto *SrcOp = + MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment); + + if (IsSet) { + // Extend value to i64 if required + if (SrcOrValue.getValueType() != MVT::i64) + SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue); + SDValue Ops[] = {Dst, Size, SrcOrValue, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp}); + return SDValue(Node, 2); + } else { + SDValue Ops[] = {Dst, SrcOrValue, Size, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp, SrcOp}); + return SDValue(Node, 3); + } +} + +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) + return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + return SDValue(); +} + SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); + } + // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); ConstantSDNode *SizeValue = dyn_cast(Size); - const AArch64Subtarget &STI = - DAG.getMachineFunction().getSubtarget(); const char *bzeroName = (V && V->isZero()) ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) @@ -55,6 +135,19 @@ return SDValue(); } +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + } + return SDValue(); +} + static const int kSetTagLoopThreshold = 176; static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops | FileCheck %s --check-prefix=CHECK-MOPS + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +declare void @fn(i8*, i8*) + +define void @consecutive() { +; CHECK-MOPS-LABEL: consecutive: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-MOPS-NEXT: sub sp, sp, #2016 +; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-MOPS-NEXT: .cfi_offset w30, -8 +; CHECK-MOPS-NEXT: .cfi_offset w29, -16 +; CHECK-MOPS-NEXT: mov w8, #1000 +; CHECK-MOPS-NEXT: add x9, sp, #8 +; CHECK-MOPS-NEXT: adrp x10, .LCPI0_0 +; CHECK-MOPS-NEXT: adrp x11, .LCPI0_1 +; CHECK-MOPS-NEXT: mov w12, #6424 +; CHECK-MOPS-NEXT: mov w13, #7452 +; CHECK-MOPS-NEXT: setp [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: setm [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: sete [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: movk w12, #6938, lsl #16 +; CHECK-MOPS-NEXT: ldr q0, [x10, :lo12:.LCPI0_0] +; CHECK-MOPS-NEXT: mov w8, #30 +; CHECK-MOPS-NEXT: ldr d1, [x11, :lo12:.LCPI0_1] +; CHECK-MOPS-NEXT: add x0, sp, #1008 +; CHECK-MOPS-NEXT: add x1, sp, #8 +; CHECK-MOPS-NEXT: str w12, [sp, #1032] +; CHECK-MOPS-NEXT: strh w13, [sp, #1036] +; CHECK-MOPS-NEXT: str q0, [sp, #1008] +; CHECK-MOPS-NEXT: str d1, [sp, #1024] +; CHECK-MOPS-NEXT: strb w8, [sp, #1038] +; CHECK-MOPS-NEXT: bl fn +; CHECK-MOPS-NEXT: add sp, sp, #2016 +; CHECK-MOPS-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-MOPS-NEXT: ret +entry: + %buf_from = alloca [1000 x i8], align 16 + %buf_to = alloca [1000 x i8], align 1 + %0 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 0 + %1 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_to, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(1000) %1, i8 0, i64 1000, i1 false) + %2 = bitcast [1000 x i8]* %buf_from to <16 x i8>* + store <16 x i8> , <16 x i8>* %2, align 16 + %arrayidx.16 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 16 + %3 = bitcast i8* %arrayidx.16 to <8 x i8>* + store <8 x i8> , <8 x i8>* %3, align 16 + %arrayidx.24 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 24 + store i8 24, i8* %arrayidx.24, align 8 + %arrayidx.25 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 25 + store i8 25, i8* %arrayidx.25, align 1 + %arrayidx.26 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 26 + store i8 26, i8* %arrayidx.26, align 2 + %arrayidx.27 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 27 + store i8 27, i8* %arrayidx.27, align 1 + %arrayidx.28 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 28 + store i8 28, i8* %arrayidx.28, align 4 + %arrayidx.29 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 29 + store i8 29, i8* %arrayidx.29, align 1 + %arrayidx.30 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 30 + store i8 30, i8* %arrayidx.30, align 2 + call void @fn(i8* nonnull %0, i8* nonnull %1) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll @@ -2,8 +2,8 @@ ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel-O0 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops,+mte | FileCheck %s --check-prefix=SDAG -; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64) define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) { @@ -15,6 +15,14 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x8 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_0_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov x8, xzr +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_0_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x8, xzr @@ -38,6 +46,14 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_1_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_1_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov w8, #1 @@ -61,6 +77,14 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_10_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov w8, #10 @@ -84,6 +108,14 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_10000_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10000 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10000_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov w8, #10000 @@ -105,6 +137,13 @@ ; GISel-O0-NEXT: setge [x0]!, x1!, x8 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_size_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: setgp [x0]!, x1!, xzr +; SDAG-NEXT: setgm [x0]!, x1!, xzr +; SDAG-NEXT: setge [x0]!, x1!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_size_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: setgp [x0]!, x1!, xzr @@ -127,6 +166,15 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_0: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov x8, xzr +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_0: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x8, xzr @@ -153,6 +201,15 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_1: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_1: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov w8, #1 @@ -179,6 +236,15 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_10: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov w8, #10 @@ -205,6 +271,15 @@ ; GISel-O0-NEXT: setge [x0]!, x8!, x9 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_10000: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10000 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10000: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov w8, #10000 @@ -229,6 +304,14 @@ ; GISel-O0-NEXT: setge [x0]!, x1!, x8 ; GISel-O0-NEXT: ret ; +; SDAG-LABEL: memset_tagged_size: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x1!, x2 +; SDAG-NEXT: setgm [x0]!, x1!, x2 +; SDAG-NEXT: setge [x0]!, x1!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_size: ; GISel: // %bb.0: // %entry ; GISel-NEXT: // kill: def $w2 killed $w2 def $x2 diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -4,6 +4,8 @@ ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=GISel-WITHOUT-MOPS,GISel-WITHOUT-MOPS-O3 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O0 ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -global-isel=1 -global-isel-abort=1 -mattr=+mops | FileCheck %s --check-prefixes=GISel-MOPS,GISel-MOPS-O3 +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 | FileCheck %s --check-prefix=SDAG-WITHOUT-MOPS-O2 +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops | FileCheck %s --check-prefix=SDAG-MOPS-O2 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) @@ -21,6 +23,14 @@ ; GISel-MOPS-LABEL: memset_0_zeroval: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0_zeroval: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_0_zeroval: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false) ret void @@ -34,6 +44,14 @@ ; GISel-MOPS-LABEL: memset_0_zeroval_volatile: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0_zeroval_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_0_zeroval_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true) ret void @@ -51,6 +69,18 @@ ; GISel-MOPS-NEXT: str xzr, [x0] ; GISel-MOPS-NEXT: strh wzr, [x0, #8] ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: strh wzr, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str xzr, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10_zeroval: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: strh wzr, [x0, #8] +; SDAG-MOPS-O2-NEXT: str xzr, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 false) ret void @@ -97,6 +127,18 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: strh wzr, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str xzr, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10_zeroval_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: strh wzr, [x0, #8] +; SDAG-MOPS-O2-NEXT: str xzr, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 true) ret void @@ -143,6 +185,25 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000_zeroval: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, wzr +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10000_zeroval: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, xzr +; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, xzr +; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, xzr +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 false) ret void @@ -189,6 +250,25 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000_zeroval_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, wzr +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10000_zeroval_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, xzr +; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, xzr +; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, xzr +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 true) ret void @@ -220,6 +300,24 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x1!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x1!, xzr ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size_zeroval: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x2, x1 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, wzr +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_size_zeroval: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: setp [x0]!, x1!, xzr +; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, xzr +; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, xzr +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 false) ret void @@ -251,11 +349,30 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x1!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x1!, xzr ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size_zeroval_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x2, x1 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, wzr +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_size_zeroval_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: setp [x0]!, x1!, xzr +; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, xzr +; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, xzr +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 true) ret void } + define void @memset_0(i8* %dst, i32 %value) { ; GISel-WITHOUT-MOPS-LABEL: memset_0: ; GISel-WITHOUT-MOPS: // %bb.0: // %entry @@ -264,6 +381,14 @@ ; GISel-MOPS-LABEL: memset_0: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_0: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false) @@ -278,6 +403,14 @@ ; GISel-MOPS-LABEL: memset_0_volatile: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_0_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_0_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true) @@ -328,6 +461,26 @@ ; GISel-MOPS-O3-NEXT: str x8, [x0] ; GISel-MOPS-O3-NEXT: strh w8, [x0, #8] ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-WITHOUT-MOPS-O2-NEXT: and x9, x1, #0xff +; SDAG-WITHOUT-MOPS-O2-NEXT: mul x8, x9, x8 +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SDAG-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-MOPS-O2-NEXT: and x9, x1, #0xff +; SDAG-MOPS-O2-NEXT: mul x8, x9, x8 +; SDAG-MOPS-O2-NEXT: str x8, [x0] +; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 false) @@ -375,6 +528,26 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-WITHOUT-MOPS-O2-NEXT: and x9, x1, #0xff +; SDAG-WITHOUT-MOPS-O2-NEXT: mul x8, x9, x8 +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SDAG-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-MOPS-O2-NEXT: and x9, x1, #0xff +; SDAG-MOPS-O2-NEXT: mul x8, x9, x8 +; SDAG-MOPS-O2-NEXT: str x8, [x0] +; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 true) @@ -422,6 +595,25 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10000: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, x1 +; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, x1 +; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, x1 +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 false) @@ -469,6 +661,25 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10000_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_10000_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, x1 +; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, x1 +; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, x1 +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 true) @@ -518,6 +729,26 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x1!, x2 ; GISel-MOPS-O3-NEXT: sete [x0]!, x1!, x2 ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, x1 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, w2 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x2, x8 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_size: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-MOPS-O2-NEXT: setp [x0]!, x1!, x2 +; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, x2 +; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, x2 +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 false) @@ -567,12 +798,33 @@ ; GISel-MOPS-O3-NEXT: setm [x0]!, x1!, x2 ; GISel-MOPS-O3-NEXT: sete [x0]!, x1!, x2 ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memset_size_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, x1 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, w2 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x2, x8 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memset_size_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-MOPS-O2-NEXT: setp [x0]!, x1!, x2 +; SDAG-MOPS-O2-NEXT: setm [x0]!, x1!, x2 +; SDAG-MOPS-O2-NEXT: sete [x0]!, x1!, x2 +; SDAG-MOPS-O2-NEXT: ret entry: %value_trunc = trunc i32 %value to i8 call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 true) ret void } + define void @memcpy_0(i8* %dst, i8* %src, i32 %value) { ; GISel-WITHOUT-MOPS-LABEL: memcpy_0: ; GISel-WITHOUT-MOPS: // %bb.0: // %entry @@ -581,6 +833,14 @@ ; GISel-MOPS-LABEL: memcpy_0: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_0: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_0: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) ret void @@ -594,6 +854,14 @@ ; GISel-MOPS-LABEL: memcpy_0_volatile: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_0_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_0_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) ret void @@ -615,6 +883,22 @@ ; GISel-MOPS-NEXT: ldrh w8, [x1, #8] ; GISel-MOPS-NEXT: strh w8, [x0, #8] ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrh w8, [x1, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x9, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x9, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_10: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldrh w8, [x1, #8] +; SDAG-MOPS-O2-NEXT: ldr x9, [x1] +; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-MOPS-O2-NEXT: str x9, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) ret void @@ -658,6 +942,22 @@ ; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x8, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrh w9, [x1, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w9, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_10_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldr x8, [x1] +; SDAG-MOPS-O2-NEXT: ldrh w9, [x1, #8] +; SDAG-MOPS-O2-NEXT: strh w9, [x0, #8] +; SDAG-MOPS-O2-NEXT: str x8, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) ret void @@ -701,6 +1001,24 @@ ; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_1000: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memcpy +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_1000: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false) ret void @@ -744,6 +1062,24 @@ ; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_1000_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memcpy +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_1000_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true) ret void @@ -765,6 +1101,22 @@ ; GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_n: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memcpy +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_n: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false) ret void @@ -786,11 +1138,28 @@ ; GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_n_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memcpy +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_n_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true) ret void } + define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) { ; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0: ; GISel-WITHOUT-MOPS: // %bb.0: // %entry @@ -799,6 +1168,14 @@ ; GISel-MOPS-LABEL: memcpy_inline_0: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_0: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_0: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) ret void @@ -812,6 +1189,14 @@ ; GISel-MOPS-LABEL: memcpy_inline_0_volatile: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_0_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_0_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) ret void @@ -833,6 +1218,22 @@ ; GISel-MOPS-NEXT: ldrh w8, [x1, #8] ; GISel-MOPS-NEXT: strh w8, [x0, #8] ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_10: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrh w8, [x1, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x9, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x9, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_10: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldrh w8, [x1, #8] +; SDAG-MOPS-O2-NEXT: ldr x9, [x1] +; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-MOPS-O2-NEXT: str x9, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) ret void @@ -854,11 +1255,389 @@ ; GISel-MOPS-NEXT: ldrh w8, [x1, #8] ; GISel-MOPS-NEXT: strh w8, [x0, #8] ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_10_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x8, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrh w9, [x1, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w9, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_10_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldr x8, [x1] +; SDAG-MOPS-O2-NEXT: ldrh w9, [x1, #8] +; SDAG-MOPS-O2-NEXT: strh w9, [x0, #8] +; SDAG-MOPS-O2-NEXT: str x8, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) ret void } +define void @memcpy_inline_300(i8* %dst, i8* %src, i32 %value) { +; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_300: +; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #64] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #64] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #80] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #80] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #96] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #96] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #112] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #112] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #128] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #128] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #144] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #144] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #160] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #160] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #176] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #176] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #192] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #192] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #208] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #208] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #224] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #224] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #240] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #240] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #256] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #256] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #272] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #272] +; GISel-WITHOUT-MOPS-O0-NEXT: add x8, x1, #284 +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x8] +; GISel-WITHOUT-MOPS-O0-NEXT: add x8, x0, #284 +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x8] +; GISel-WITHOUT-MOPS-O0-NEXT: ret +; +; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_300: +; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O3-NEXT: add x8, x1, #284 +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #64] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #64] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #80] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #80] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #96] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #96] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #112] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #112] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #128] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #128] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #144] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #144] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #160] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #160] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #176] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #176] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #192] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #192] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #208] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #208] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #224] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #224] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #240] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #240] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #256] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #256] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #272] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #272] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x8] +; GISel-WITHOUT-MOPS-O3-NEXT: add x8, x0, #284 +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x8] +; GISel-WITHOUT-MOPS-O3-NEXT: ret +; +; GISel-MOPS-O0-LABEL: memcpy_inline_300: +; GISel-MOPS-O0: // %bb.0: // %entry +; GISel-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-MOPS-O0-NEXT: str q0, [x0] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-O0-NEXT: str q0, [x0, #48] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #64] +; GISel-MOPS-O0-NEXT: str q0, [x0, #64] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #80] +; GISel-MOPS-O0-NEXT: str q0, [x0, #80] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #96] +; GISel-MOPS-O0-NEXT: str q0, [x0, #96] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #112] +; GISel-MOPS-O0-NEXT: str q0, [x0, #112] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #128] +; GISel-MOPS-O0-NEXT: str q0, [x0, #128] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #144] +; GISel-MOPS-O0-NEXT: str q0, [x0, #144] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #160] +; GISel-MOPS-O0-NEXT: str q0, [x0, #160] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #176] +; GISel-MOPS-O0-NEXT: str q0, [x0, #176] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #192] +; GISel-MOPS-O0-NEXT: str q0, [x0, #192] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #208] +; GISel-MOPS-O0-NEXT: str q0, [x0, #208] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #224] +; GISel-MOPS-O0-NEXT: str q0, [x0, #224] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #240] +; GISel-MOPS-O0-NEXT: str q0, [x0, #240] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #256] +; GISel-MOPS-O0-NEXT: str q0, [x0, #256] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #272] +; GISel-MOPS-O0-NEXT: str q0, [x0, #272] +; GISel-MOPS-O0-NEXT: add x8, x1, #284 +; GISel-MOPS-O0-NEXT: ldr q0, [x8] +; GISel-MOPS-O0-NEXT: add x8, x0, #284 +; GISel-MOPS-O0-NEXT: str q0, [x8] +; GISel-MOPS-O0-NEXT: ret +; +; GISel-MOPS-O3-LABEL: memcpy_inline_300: +; GISel-MOPS-O3: // %bb.0: // %entry +; GISel-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-MOPS-O3-NEXT: add x8, x1, #284 +; GISel-MOPS-O3-NEXT: str q0, [x0] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-O3-NEXT: str q0, [x0, #48] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #64] +; GISel-MOPS-O3-NEXT: str q0, [x0, #64] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #80] +; GISel-MOPS-O3-NEXT: str q0, [x0, #80] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #96] +; GISel-MOPS-O3-NEXT: str q0, [x0, #96] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #112] +; GISel-MOPS-O3-NEXT: str q0, [x0, #112] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #128] +; GISel-MOPS-O3-NEXT: str q0, [x0, #128] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #144] +; GISel-MOPS-O3-NEXT: str q0, [x0, #144] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #160] +; GISel-MOPS-O3-NEXT: str q0, [x0, #160] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #176] +; GISel-MOPS-O3-NEXT: str q0, [x0, #176] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #192] +; GISel-MOPS-O3-NEXT: str q0, [x0, #192] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #208] +; GISel-MOPS-O3-NEXT: str q0, [x0, #208] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #224] +; GISel-MOPS-O3-NEXT: str q0, [x0, #224] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #240] +; GISel-MOPS-O3-NEXT: str q0, [x0, #240] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #256] +; GISel-MOPS-O3-NEXT: str q0, [x0, #256] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #272] +; GISel-MOPS-O3-NEXT: str q0, [x0, #272] +; GISel-MOPS-O3-NEXT: ldr q0, [x8] +; GISel-MOPS-O3-NEXT: add x8, x0, #284 +; GISel-MOPS-O3-NEXT: str q0, [x8] +; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x1, #284 +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #80] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q2, [x1, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #80] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q2, [x0, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #144] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q2, [x1, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #144] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q2, [x0, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #208] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q2, [x1, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #208] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q2, [x0, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q1, [x1, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x8] +; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x0, #284 +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x8] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q1, [x0, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_300: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #300 +; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 300, i1 false) + ret void +} + +define void @memcpy_inline_300_volatile(i8* %dst, i8* %src, i32 %value) { +; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_300_volatile: +; GISel-WITHOUT-MOPS: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #64] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #64] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #80] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #80] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #96] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #96] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #112] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #112] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #128] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #128] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #144] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #144] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #160] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #160] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #176] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #176] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #192] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #192] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #208] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #208] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #224] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #224] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #240] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #240] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #256] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #256] +; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #272] +; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #272] +; GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1, #288] +; GISel-WITHOUT-MOPS-NEXT: str x8, [x0, #288] +; GISel-WITHOUT-MOPS-NEXT: ldr w8, [x1, #296] +; GISel-WITHOUT-MOPS-NEXT: str w8, [x0, #296] +; GISel-WITHOUT-MOPS-NEXT: ret +; +; GISel-MOPS-LABEL: memcpy_inline_300_volatile: +; GISel-MOPS: // %bb.0: // %entry +; GISel-MOPS-NEXT: ldr q0, [x1] +; GISel-MOPS-NEXT: str q0, [x0] +; GISel-MOPS-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-NEXT: str q0, [x0, #16] +; GISel-MOPS-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-NEXT: str q0, [x0, #32] +; GISel-MOPS-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-NEXT: str q0, [x0, #48] +; GISel-MOPS-NEXT: ldr q0, [x1, #64] +; GISel-MOPS-NEXT: str q0, [x0, #64] +; GISel-MOPS-NEXT: ldr q0, [x1, #80] +; GISel-MOPS-NEXT: str q0, [x0, #80] +; GISel-MOPS-NEXT: ldr q0, [x1, #96] +; GISel-MOPS-NEXT: str q0, [x0, #96] +; GISel-MOPS-NEXT: ldr q0, [x1, #112] +; GISel-MOPS-NEXT: str q0, [x0, #112] +; GISel-MOPS-NEXT: ldr q0, [x1, #128] +; GISel-MOPS-NEXT: str q0, [x0, #128] +; GISel-MOPS-NEXT: ldr q0, [x1, #144] +; GISel-MOPS-NEXT: str q0, [x0, #144] +; GISel-MOPS-NEXT: ldr q0, [x1, #160] +; GISel-MOPS-NEXT: str q0, [x0, #160] +; GISel-MOPS-NEXT: ldr q0, [x1, #176] +; GISel-MOPS-NEXT: str q0, [x0, #176] +; GISel-MOPS-NEXT: ldr q0, [x1, #192] +; GISel-MOPS-NEXT: str q0, [x0, #192] +; GISel-MOPS-NEXT: ldr q0, [x1, #208] +; GISel-MOPS-NEXT: str q0, [x0, #208] +; GISel-MOPS-NEXT: ldr q0, [x1, #224] +; GISel-MOPS-NEXT: str q0, [x0, #224] +; GISel-MOPS-NEXT: ldr q0, [x1, #240] +; GISel-MOPS-NEXT: str q0, [x0, #240] +; GISel-MOPS-NEXT: ldr q0, [x1, #256] +; GISel-MOPS-NEXT: str q0, [x0, #256] +; GISel-MOPS-NEXT: ldr q0, [x1, #272] +; GISel-MOPS-NEXT: str q0, [x0, #272] +; GISel-MOPS-NEXT: ldr x8, [x1, #288] +; GISel-MOPS-NEXT: str x8, [x0, #288] +; GISel-MOPS-NEXT: ldr w8, [x1, #296] +; GISel-MOPS-NEXT: str w8, [x0, #296] +; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #80] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #96] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #96] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #80] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #128] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #144] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #160] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #160] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #144] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #128] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #192] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #208] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #224] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #224] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #208] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #192] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #272] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x8, [x1, #288] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr w9, [x1, #296] +; SDAG-WITHOUT-MOPS-O2-NEXT: str w9, [x0, #296] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0, #288] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #272] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_300_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #300 +; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 300, i1 true) + ret void +} + define void @memmove_0(i8* %dst, i8* %src, i32 %value) { ; GISel-WITHOUT-MOPS-LABEL: memmove_0: ; GISel-WITHOUT-MOPS: // %bb.0: // %entry @@ -867,6 +1646,14 @@ ; GISel-MOPS-LABEL: memmove_0: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_0: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_0: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) ret void @@ -880,6 +1667,14 @@ ; GISel-MOPS-LABEL: memmove_0_volatile: ; GISel-MOPS: // %bb.0: // %entry ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_0_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_0_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) ret void @@ -917,6 +1712,22 @@ ; GISel-MOPS-O3-NEXT: str x8, [x0] ; GISel-MOPS-O3-NEXT: strh w9, [x0, #8] ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrh w8, [x1, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x9, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x9, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_10: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldrh w8, [x1, #8] +; SDAG-MOPS-O2-NEXT: ldr x9, [x1] +; SDAG-MOPS-O2-NEXT: strh w8, [x0, #8] +; SDAG-MOPS-O2-NEXT: str x9, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) ret void @@ -960,6 +1771,22 @@ ; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x8, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrh w9, [x1, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: strh w9, [x0, #8] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_10_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldr x8, [x1] +; SDAG-MOPS-O2-NEXT: ldrh w9, [x1, #8] +; SDAG-MOPS-O2-NEXT: strh w9, [x0, #8] +; SDAG-MOPS-O2-NEXT: str x8, [x0] +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) ret void @@ -1003,6 +1830,24 @@ ; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_1000: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memmove +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_1000: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: cpyp [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false) ret void @@ -1046,6 +1891,24 @@ ; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_1000_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memmove +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_1000_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: cpyp [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x8! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true) ret void @@ -1067,6 +1930,22 @@ ; GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_n: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memmove +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_n: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: cpyp [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false) ret void @@ -1088,6 +1967,22 @@ ; GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x2! ; GISel-MOPS-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_n_volatile: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 +; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 +; SDAG-WITHOUT-MOPS-O2-NEXT: bl memmove +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memmove_n_volatile: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: cpyp [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x2! +; SDAG-MOPS-O2-NEXT: ret entry: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true) ret void