diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -450,6 +450,12 @@ LDP, STP, STNP, + + // Memory Operations + MOPS_MEMSET, + MOPS_MEMSET_TAGGING, + MOPS_MEMCOPY, + MOPS_MEMMOVE, }; } // end namespace AArch64ISD @@ -887,6 +893,7 @@ SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -936,21 +937,26 @@ setTargetDAGCombine(ISD::GlobalAddress); - // In case of strict alignment, avoid an excessive number of byte wide stores. + // FIXME: right now we are being conservative on when it is best to use + // MOPS instructions instead of ldr/str sequence(s), i.e. we are emitting + // the MOPS instructions for any time we used to emit a call to mem* + // functions. These threshold values should be updated once we have further + // evaluated the performance differences. MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemset = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemsetOptSize : 32; + MaxStoresPerMemset = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32; MaxGluedStoresPerMemcpy = 4; MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemcpyOptSize : 16; + MaxStoresPerMemcpy = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16; - MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemmove = 4; MaxLoadsPerMemcmpOptSize = 4; - MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() - ? MaxLoadsPerMemcmpOptSize : 8; + MaxLoadsPerMemcmp = + Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8; setStackPointerRegisterToSaveRestore(AArch64::SP); @@ -1423,6 +1429,11 @@ setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); } + if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { + // Only required for llvm.aarch64.mops.memset.tag + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -2262,6 +2273,10 @@ MAKE_CASE(AArch64ISD::UADDLP) MAKE_CASE(AArch64ISD::CALL_RVMARKER) MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) + MAKE_CASE(AArch64ISD::MOPS_MEMSET) + MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) + MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) + MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) } #undef MAKE_CASE return nullptr; @@ -4050,6 +4065,39 @@ return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); } +SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::aarch64_mops_memset_tag: { + auto Node = cast(Op.getNode()); + SDLoc DL(Op); + SDValue Chain = Node->getChain(); + SDValue Dst = Op.getOperand(2); + SDValue Val = Op.getOperand(3); + Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64); + SDValue Size = Op.getOperand(4); + auto Alignment = Node->getMemOperand()->getAlign(); + bool IsVol = Node->isVolatile(); + auto DstPtrInfo = Node->getPointerInfo(); + + const auto &SDI = + static_cast(DAG.getSelectionDAGInfo()); + SDValue MS = + SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val, + Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{}); + + // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the + // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise + // LowerOperationWrapper will complain that the number of results has + // changed. + return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL); + } + } +} + SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -5117,6 +5165,8 @@ case ISD::MULHU: return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, /*OverrideNEON=*/true); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::ATOMIC_STORE: @@ -11837,6 +11887,20 @@ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; return true; } + case Intrinsic::aarch64_mops_memset_tag: { + Value *Dst = I.getArgOperand(0); + Value *Val = I.getArgOperand(1); + PointerType *PtrTy = cast(Dst->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(Val->getType()); + Info.ptrVal = Dst; + Info.offset = 0; + Info.align = DL.getABITypeAlign(PtrTy->getElementType()); + Info.flags = MachineMemOperand::MOStore; + // The size of the memory being operated on is unknown at this point + Info.size = MemoryLocation::UnknownSize; + return true; + } default: break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8353,6 +8353,14 @@ defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; } +// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain +// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain +def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; +def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; +def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; +def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; +def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; + let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { let mayLoad = 1 in { def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -19,11 +19,30 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { public: + SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, + const SDLoc &DL, SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, Align Alignment, + bool isVolatile, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, MachinePointerInfo DstPtrInfo, diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -15,15 +15,97 @@ #define DEBUG_TYPE "aarch64-selectiondag-info" +SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, + SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + + // Get the constant size of the copy/set. + uint64_t ConstSize = 0; + if (auto *C = dyn_cast(Size)) + ConstSize = C->getZExtValue(); + + const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET || + SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING; + + const auto MachineOpcode = [&]() { + switch (SDOpcode) { + case AArch64ISD::MOPS_MEMSET: + return AArch64::MOPSMemorySetPseudo; + case AArch64ISD::MOPS_MEMSET_TAGGING: + return AArch64::MOPSMemorySetTaggingPseudo; + case AArch64ISD::MOPS_MEMCOPY: + return AArch64::MOPSMemoryCopyPseudo; + case AArch64ISD::MOPS_MEMMOVE: + return AArch64::MOPSMemoryMovePseudo; + default: + llvm_unreachable("Unhandled MOPS ISD Opcode"); + } + }(); + + MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (!IsSet) + Flags |= MachineMemOperand::MOLoad; + + MachineFunction &MF = DAG.getMachineFunction(); + + auto *DstOp = + MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment); + auto *SrcOp = + MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment); + + // Extend i8 value to i64 if required + if (SrcOrValue.getValueType().getSimpleVT() == MVT::i8) { + SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue); + } + + if (IsSet) { + SDValue Ops[] = {Dst, Size, SrcOrValue, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp}); + return SDValue(Node, 2); + } else { + SDValue Ops[] = {Dst, SrcOrValue, Size, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp, SrcOp}); + return SDValue(Node, 3); + } +} + +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) + return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + return SDValue(); +} + SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); + } + // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); ConstantSDNode *SizeValue = dyn_cast(Size); - const AArch64Subtarget &STI = - DAG.getMachineFunction().getSubtarget(); const char *bzeroName = (V && V->isZero()) ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) @@ -55,6 +137,19 @@ return SDValue(); } +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + } + return SDValue(); +} + static const int kSetTagLoopThreshold = 176; static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops | FileCheck %s --check-prefix=CHECK-MOPS + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +declare void @fn(i8*, i8*) + +define void @consecutive() { +; CHECK-MOPS-LABEL: consecutive: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-MOPS-NEXT: sub sp, sp, #2016 +; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-MOPS-NEXT: .cfi_offset w30, -8 +; CHECK-MOPS-NEXT: .cfi_offset w29, -16 +; CHECK-MOPS-NEXT: mov w8, #1000 +; CHECK-MOPS-NEXT: add x9, sp, #8 +; CHECK-MOPS-NEXT: adrp x10, .LCPI0_0 +; CHECK-MOPS-NEXT: adrp x11, .LCPI0_1 +; CHECK-MOPS-NEXT: mov w12, #6424 +; CHECK-MOPS-NEXT: mov w13, #7452 +; CHECK-MOPS-NEXT: setp [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: setm [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: sete [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: movk w12, #6938, lsl #16 +; CHECK-MOPS-NEXT: ldr q0, [x10, :lo12:.LCPI0_0] +; CHECK-MOPS-NEXT: mov w8, #30 +; CHECK-MOPS-NEXT: ldr d1, [x11, :lo12:.LCPI0_1] +; CHECK-MOPS-NEXT: add x0, sp, #1008 +; CHECK-MOPS-NEXT: add x1, sp, #8 +; CHECK-MOPS-NEXT: str w12, [sp, #1032] +; CHECK-MOPS-NEXT: strh w13, [sp, #1036] +; CHECK-MOPS-NEXT: str q0, [sp, #1008] +; CHECK-MOPS-NEXT: str d1, [sp, #1024] +; CHECK-MOPS-NEXT: strb w8, [sp, #1038] +; CHECK-MOPS-NEXT: bl fn +; CHECK-MOPS-NEXT: add sp, sp, #2016 +; CHECK-MOPS-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-MOPS-NEXT: ret +entry: + %buf_from = alloca [1000 x i8], align 16 + %buf_to = alloca [1000 x i8], align 1 + %0 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 0 + %1 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_to, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(1000) %1, i8 0, i64 1000, i1 false) + %2 = bitcast [1000 x i8]* %buf_from to <16 x i8>* + store <16 x i8> , <16 x i8>* %2, align 16 + %arrayidx.16 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 16 + %3 = bitcast i8* %arrayidx.16 to <8 x i8>* + store <8 x i8> , <8 x i8>* %3, align 16 + %arrayidx.24 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 24 + store i8 24, i8* %arrayidx.24, align 8 + %arrayidx.25 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 25 + store i8 25, i8* %arrayidx.25, align 1 + %arrayidx.26 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 26 + store i8 26, i8* %arrayidx.26, align 2 + %arrayidx.27 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 27 + store i8 27, i8* %arrayidx.27, align 1 + %arrayidx.28 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 28 + store i8 28, i8* %arrayidx.28, align 4 + %arrayidx.29 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 29 + store i8 29, i8* %arrayidx.29, align 1 + %arrayidx.30 = getelementptr inbounds [1000 x i8], [1000 x i8]* %buf_from, i64 0, i64 30 + store i8 30, i8* %arrayidx.30, align 2 + call void @fn(i8* nonnull %0, i8* nonnull %1) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll @@ -1,11 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops,+mte | FileCheck %s --check-prefix=SDAG ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel -; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64) define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_0_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov x8, xzr +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_0_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x8, xzr @@ -19,6 +27,14 @@ } define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_1_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_1_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x9, xzr @@ -34,6 +50,14 @@ } define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_10_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x9, xzr @@ -49,6 +73,14 @@ } define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_10000_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10000 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10000_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x9, xzr @@ -64,6 +96,13 @@ } define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_size_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: setgp [x0]!, x1!, xzr +; SDAG-NEXT: setgm [x0]!, x1!, xzr +; SDAG-NEXT: setge [x0]!, x1!, xzr +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_size_zeroval: ; GISel: // %bb.0: // %entry ; GISel-NEXT: mov x8, xzr @@ -77,6 +116,15 @@ } define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_0: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov x8, xzr +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_0: ; GISel: // %bb.0: // %entry ; GISel-NEXT: // implicit-def: $x9 @@ -93,6 +141,15 @@ } define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_1: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_1: ; GISel: // %bb.0: // %entry ; GISel-NEXT: // implicit-def: $x9 @@ -110,6 +167,15 @@ } define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_10: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10: ; GISel: // %bb.0: // %entry ; GISel-NEXT: // implicit-def: $x9 @@ -127,6 +193,15 @@ } define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_10000: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10000 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_10000: ; GISel: // %bb.0: // %entry ; GISel-NEXT: // implicit-def: $x9 @@ -144,6 +219,14 @@ } define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_size: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x1!, x2 +; SDAG-NEXT: setgm [x0]!, x1!, x2 +; SDAG-NEXT: setge [x0]!, x1!, x2 +; SDAG-NEXT: ret +; ; GISel-LABEL: memset_tagged_size: ; GISel: // %bb.0: // %entry ; GISel-NEXT: // implicit-def: $x8 diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -1,21 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 | FileCheck %s --check-prefix=O2-SDAG-WITHOUT-MOPS +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops | FileCheck %s --check-prefix=O2-SDAG-MOPS + ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefix=O0-GISel-WITHOUT-MOPS ; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops | FileCheck %s --check-prefix=O0-GISel-MOPS -; Function Attrs: argmemonly nofree nounwind willreturn writeonly declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) -; Function Attrs: argmemonly nofree nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) -; Function Attrs: argmemonly nofree nounwind willreturn declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) -; Function Attrs: argmemonly nofree nounwind willreturn declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) define void @memset_0_zeroval(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -34,6 +41,14 @@ } define void @memset_0_zeroval_volatile(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -52,6 +67,18 @@ } define void @memset_10_zeroval(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str xzr, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: strh wzr, [x0, #8] +; O2-SDAG-MOPS-NEXT: str xzr, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str xzr, [x0] @@ -73,6 +100,18 @@ } define void @memset_10_zeroval_volatile(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str xzr, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: strh wzr, [x0, #8] +; O2-SDAG-MOPS-NEXT: str xzr, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -100,6 +139,25 @@ } define void @memset_10000_zeroval(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_zeroval: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -127,6 +185,25 @@ } define void @memset_10000_zeroval_volatile(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_zeroval_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -154,6 +231,24 @@ } define void @memset_size_zeroval(i8* %dst, i64 %size) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -178,6 +273,24 @@ } define void @memset_size_zeroval_volatile(i8* %dst, i64 %size) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -201,7 +314,16 @@ ret void } + define void @memset_0(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_0: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -221,6 +343,14 @@ } define void @memset_0_volatile(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -240,6 +370,26 @@ } define void @memset_10(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, #72340172838076673 +; O2-SDAG-WITHOUT-MOPS-NEXT: and x9, x1, #0xff +; O2-SDAG-WITHOUT-MOPS-NEXT: mul x8, x9, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: mov x8, #72340172838076673 +; O2-SDAG-MOPS-NEXT: and x9, x1, #0xff +; O2-SDAG-MOPS-NEXT: mul x8, x9, x8 +; O2-SDAG-MOPS-NEXT: str x8, [x0] +; O2-SDAG-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: // implicit-def: $x8 @@ -269,6 +419,26 @@ } define void @memset_10_volatile(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, #72340172838076673 +; O2-SDAG-WITHOUT-MOPS-NEXT: and x9, x1, #0xff +; O2-SDAG-WITHOUT-MOPS-NEXT: mul x8, x9, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: mov x8, #72340172838076673 +; O2-SDAG-MOPS-NEXT: and x9, x1, #0xff +; O2-SDAG-MOPS-NEXT: mul x8, x9, x8 +; O2-SDAG-MOPS-NEXT: str x8, [x0] +; O2-SDAG-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -297,6 +467,25 @@ } define void @memset_10000(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -325,6 +514,25 @@ } define void @memset_10000_volatile(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -353,6 +561,26 @@ } define void @memset_size(i8* %dst, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, w2 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: // kill: def $w2 killed $w2 def $x2 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_size: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: sub sp, sp, #32 @@ -382,6 +610,26 @@ } define void @memset_size_volatile(i8* %dst, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, w2 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: // kill: def $w2 killed $w2 def $x2 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: sub sp, sp, #32 @@ -410,7 +658,16 @@ ret void } + define void @memcpy_0(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_0: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -428,6 +685,14 @@ } define void @memcpy_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_0_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -445,6 +710,22 @@ } define void @memcpy_10(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x9, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-MOPS-NEXT: str x9, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_10: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] @@ -467,6 +748,22 @@ } define void @memcpy_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-MOPS-NEXT: str x8, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -492,6 +789,24 @@ } define void @memcpy_1000(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_1000: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_1000: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_1000: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -517,6 +832,24 @@ } define void @memcpy_1000_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_1000_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_1000_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_1000_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -542,6 +875,22 @@ } define void @memcpy_n(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_n: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_n: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_n: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -563,6 +912,22 @@ } define void @memcpy_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_n_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_n_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_n_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -583,7 +948,16 @@ ret void } + define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -597,6 +971,14 @@ } define void @memcpy_inline_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -610,6 +992,22 @@ } define void @memcpy_inline_10(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x9, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-MOPS-NEXT: str x9, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] @@ -631,6 +1029,22 @@ } define void @memcpy_inline_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-MOPS-NEXT: str x8, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] @@ -651,7 +1065,16 @@ ret void } + define void @memmove_0(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_0: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -669,6 +1092,14 @@ } define void @memmove_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_0_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ret @@ -686,6 +1117,22 @@ } define void @memmove_10(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x9, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-MOPS-NEXT: str x9, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_10: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: ldr x9, [x1] @@ -708,6 +1155,22 @@ } define void @memmove_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-MOPS-NEXT: str x8, [x0] +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_10_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -733,6 +1196,24 @@ } define void @memmove_1000(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_1000: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_1000: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_1000: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -758,6 +1239,24 @@ } define void @memmove_1000_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_1000_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_1000_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_1000_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -783,6 +1282,22 @@ } define void @memmove_n(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_n: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_n: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_n: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -804,6 +1319,22 @@ } define void @memmove_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_n_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_n_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; ; O0-GISel-WITHOUT-MOPS-LABEL: memmove_n_volatile: ; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry ; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill