diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -62,6 +62,9 @@ BUILTIN(__builtin_arm_stg, "vv*", "t") BUILTIN(__builtin_arm_subp, "Uiv*v*", "t") +// Memory Operations +BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "") + // Memory barrier BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9765,6 +9765,18 @@ return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Operations (MOPS) + if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { + Value *Dst = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *Size = EmitScalarExpr(E->getArg(2)); + Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); + Val = Builder.CreateTrunc(Val, Int8Ty); + Size = Builder.CreateIntCast(Size, Int64Ty, false); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); + } + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -730,6 +730,12 @@ #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) #endif +/* Memory Operations Intrinsics */ +#if __ARM_FEATURE_MOPS && __ARM_FEATURE_MEMORY_TAGGING +#define __arm_mops_memset_tag(tagged_address, value, size) \ + __builtin_arm_mops_memset_tag(tagged_address, value, size) +#endif + /* Transactional Memory Extension (TME) Intrinsics */ #if __ARM_FEATURE_TME diff --git a/clang/test/CodeGen/aarch64-mops.c b/clang/test/CodeGen/aarch64-mops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-mops.c @@ -0,0 +1,152 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -O0 -S -emit-llvm -o - %s | FileCheck %s + +#define __ARM_FEATURE_MOPS 1 +#include +#include + +// CHECK-LABEL: @bzero_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_0(void *dst) { + return __arm_mops_memset_tag(dst, 0, 0); +} + +// CHECK-LABEL: @bzero_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_1(void *dst) { + return __arm_mops_memset_tag(dst, 0, 1); +} + +// CHECK-LABEL: @bzero_10( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_10(void *dst) { + return __arm_mops_memset_tag(dst, 0, 10); +} + +// CHECK-LABEL: @bzero_10000( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_10000(void *dst) { + return __arm_mops_memset_tag(dst, 0, 10000); +} + +// CHECK-LABEL: @bzero_n( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]]) +// CHECK-NEXT: ret i8* [[TMP2]] +// +void *bzero_n(void *dst, size_t size) { + return __arm_mops_memset_tag(dst, 0, size); +} + +// CHECK-LABEL: @memset_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_0(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 0); +} + +// CHECK-LABEL: @memset_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_1(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 1); +} + +// CHECK-LABEL: @memset_10( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_10(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 10); +} + +// CHECK-LABEL: @memset_10000( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_10000(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 10000); +} + +// CHECK-LABEL: @memset_n( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]]) +// CHECK-NEXT: ret i8* [[TMP4]] +// +void *memset_n(void *dst, int value, size_t size) { + return __arm_mops_memset_tag(dst, value, size); +} diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -432,16 +432,6 @@ return TypeIdx; } - unsigned immIdx(unsigned ImmIdx) { - assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - - MCOI::OPERAND_FIRST_GENERIC_IMM) && - "Imm Index is out of bounds"); -#ifndef NDEBUG - ImmIdxsCovered.set(ImmIdx); -#endif - return ImmIdx; - } - void markAllIdxsAsCovered() { #ifndef NDEBUG TypeIdxsCovered.set(); @@ -568,6 +558,16 @@ } unsigned getAlias() const { return AliasOf; } + unsigned immIdx(unsigned ImmIdx) { + assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM) && + "Imm Index is out of bounds"); +#ifndef NDEBUG + ImmIdxsCovered.set(ImmIdx); +#endif + return ImmIdx; + } + /// The instruction is legal if predicate is true. LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that the free-form @@ -824,11 +824,22 @@ LegalizeRuleSet &customForCartesianProduct(std::initializer_list Types) { return actionForCartesianProduct(LegalizeAction::Custom, Types); } + /// The instruction is custom when type indexes 0 and 1 are both in their + /// respective lists. LegalizeRuleSet & customForCartesianProduct(std::initializer_list Types0, std::initializer_list Types1) { return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1); } + /// The instruction is custom when when type indexes 0, 1, and 2 are all in + /// their respective lists. + LegalizeRuleSet & + customForCartesianProduct(std::initializer_list Types0, + std::initializer_list Types1, + std::initializer_list Types2) { + return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1, + Types2); + } /// Unconditionally custom lower. LegalizeRuleSet &custom() { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -890,6 +890,14 @@ [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; } +//===----------------------------------------------------------------------===// +// Memory Operations (MOPS) Intrinsics +let TargetPrefix = "aarch64" in { + // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64 + def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; +} + // Transactional Memory Extension (TME) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" @@ -88,6 +89,8 @@ MachineBasicBlock::iterator MBBI); bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMOPS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + unsigned Opc); }; } // end anonymous namespace @@ -807,6 +810,51 @@ return true; } +bool AArch64ExpandPseudo::expandMOPS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Pseudo) { + auto &STI = MBB.getParent()->getSubtarget(); + assert(STI.hasMOPS()); + assert(STI.hasMTE() || Pseudo != AArch64::MOPSMemorySetTagging); + + const auto Ops = [Pseudo]() -> std::array { + if (Pseudo == AArch64::MOPSMemoryCopy) + return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE}; + if (Pseudo == AArch64::MOPSMemoryMove) + return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE}; + if (Pseudo == AArch64::MOPSMemorySet) + return {AArch64::SETP, AArch64::SETM, AArch64::SETE}; + if (Pseudo == AArch64::MOPSMemorySetTagging) + return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE}; + llvm_unreachable("Unhandled memory operation pseudo"); + }(); + const bool IsSet = Pseudo == AArch64::MOPSMemorySet || + Pseudo == AArch64::MOPSMemorySetTagging; + + // MOPS requires consecutive instructions in its sequences, so pack them + // inside a bundle to prevent other passes to moving things in between. + MIBundleBuilder Bundler(MBB, MBBI); + auto &MF = *MBB.getParent(); + for (auto Op : Ops) { + auto B = BuildMI(MF, MBBI->getDebugLoc(), TII->get(Op)); + int i = 0; + // Destination registers + B.addDef(MBBI->getOperand(i++).getReg()); + B.addDef(MBBI->getOperand(i++).getReg()); + if (!IsSet) + B.addDef(MBBI->getOperand(i++).getReg()); + // Input registers + B.addUse(MBBI->getOperand(i++).getReg()); + B.addUse(MBBI->getOperand(i++).getReg()); + B.addUse(MBBI->getOperand(i++).getReg()); + Bundler.append(B); + } + finalizeBundle(MBB, Bundler.begin(), Bundler.end()); + + MBBI->eraseFromParent(); + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -1231,6 +1279,11 @@ return expandCALL_RVMARKER(MBB, MBBI); case AArch64::StoreSwiftAsyncContext: return expandStoreSwiftAsyncContext(MBB, MBBI); + case AArch64::MOPSMemoryCopy: + case AArch64::MOPSMemoryMove: + case AArch64::MOPSMemorySet: + case AArch64::MOPSMemorySetTagging: + return expandMOPS(MBB, MBBI, Opcode); } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -450,6 +450,12 @@ LDP, STP, STNP, + + // Memory Operations + MOPS_MEMSET, + MOPS_MEMSET_TAGGING, + MOPS_MEMCOPY, + MOPS_MEMMOVE, }; } // end namespace AArch64ISD @@ -887,6 +893,7 @@ SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; bool isEligibleForTailCallOptimization( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -936,17 +936,28 @@ setTargetDAGCombine(ISD::GlobalAddress); - // In case of strict alignment, avoid an excessive number of byte wide stores. - MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemset = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemsetOptSize : 32; + if (Subtarget->hasMOPS()) { + // If we have MOPS, always use them + MaxStoresPerMemsetOptSize = 0; + MaxStoresPerMemset = 0; + MaxGluedStoresPerMemcpy = 0; + MaxStoresPerMemcpyOptSize = 0; + MaxStoresPerMemcpy = 0; + MaxStoresPerMemmoveOptSize = 0; + MaxStoresPerMemmove = 0; + } else { + // In case of strict alignment, avoid an excessive number of byte wide stores. + MaxStoresPerMemsetOptSize = 8; + MaxStoresPerMemset = Subtarget->requiresStrictAlign() + ? MaxStoresPerMemsetOptSize : 32; - MaxGluedStoresPerMemcpy = 4; - MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemcpyOptSize : 16; + MaxGluedStoresPerMemcpy = 4; + MaxStoresPerMemcpyOptSize = 4; + MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() + ? MaxStoresPerMemcpyOptSize : 16; - MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + } MaxLoadsPerMemcmpOptSize = 4; MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() @@ -1423,6 +1434,11 @@ setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); } + if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { + // Only required for llvm.aarch64.mops.memset.tag + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -2240,6 +2256,10 @@ MAKE_CASE(AArch64ISD::UADDLP) MAKE_CASE(AArch64ISD::CALL_RVMARKER) MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) + MAKE_CASE(AArch64ISD::MOPS_MEMSET) + MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) + MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) + MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) } #undef MAKE_CASE return nullptr; @@ -4028,6 +4048,39 @@ return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); } +SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::aarch64_mops_memset_tag: { + auto Node = cast(Op.getNode()); + SDLoc DL(Op); + SDValue Chain = Node->getChain(); + SDValue Dst = Op.getOperand(2); + SDValue Val = Op.getOperand(3); + Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64); + SDValue Size = Op.getOperand(4); + auto Alignment = Node->getMemOperand()->getAlign(); + bool IsVol = Node->isVolatile(); + auto DstPtrInfo = Node->getPointerInfo(); + + const auto &SDI = + static_cast(DAG.getSelectionDAGInfo()); + SDValue MS = + SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val, + Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{}); + + // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the + // intrinsic has 2. So hide SizeWb it using MERGE_VALUES. Otherwise + // LowerOperationWrapper will complain that the number of results has + // changed. + return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL); + } + } +} + SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -5095,6 +5148,8 @@ case ISD::MULHU: return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, /*OverrideNEON=*/true); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::ATOMIC_STORE: @@ -11812,6 +11867,18 @@ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; return true; } + case Intrinsic::aarch64_mops_memset_tag: { + Value *Dst = I.getArgOperand(0); + Value *Val = I.getArgOperand(1); + PointerType *PtrTy = cast(Dst->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(Val->getType()); + Info.ptrVal = Dst; + Info.offset = 0; + Info.align = DL.getABITypeAlign(PtrTy->getElementType()); + Info.flags = MachineMemOperand::MOStore; + return true; + } default: break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8337,6 +8337,35 @@ defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; } +// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain +// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain +def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; +def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; +def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; +def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; +def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; + +let Predicates = [HasMOPS], mayStore = 1 in { + let mayLoad = 1 in { + def MOPSMemoryCopy : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), + [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; + def MOPSMemoryMove : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), + [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; + } + let mayLoad = 0 in { + def MOPSMemorySet : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; + } +} +let Predicates = [HasMOPS, HasMTE], mayLoad = 0, mayStore = 1 in { + def MOPSMemorySetTagging : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; +} + let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in def StoreSwiftAsyncContext : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -19,11 +19,30 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { public: + SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, + const SDLoc &DL, SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, Align Alignment, + bool isVolatile, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, MachinePointerInfo DstPtrInfo, diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -15,15 +15,102 @@ #define DEBUG_TYPE "aarch64-selectiondag-info" +SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, + SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, + SDValue SrcOrValue, SDValue Size, + Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + + // Get the constant size of the copy/set. We don't use it. + uint64_t ConstSize = 0; + if (auto *C = dyn_cast(Size)) { + ConstSize = cast(Size)->getZExtValue(); + } + + const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET || + SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING; + + const auto MachineOpcode = [&]() { + switch (SDOpcode) { + case AArch64ISD::MOPS_MEMSET: + return AArch64::MOPSMemorySet; + case AArch64ISD::MOPS_MEMSET_TAGGING: + return AArch64::MOPSMemorySetTagging; + case AArch64ISD::MOPS_MEMCOPY: + return AArch64::MOPSMemoryCopy; + case AArch64ISD::MOPS_MEMMOVE: + return AArch64::MOPSMemoryMove; + default: + break; + } + llvm_unreachable_internal("Unhandled MOPS ISD Opcode"); + return AArch64::INSTRUCTION_LIST_END; + }(); + + MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; + // if (!Temporal) + // Flags |= MachineMemOperand::MONonTemporal; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (!IsSet) + Flags |= MachineMemOperand::MOLoad; + + MachineFunction &MF = DAG.getMachineFunction(); + + auto *DstOp = + MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment); + auto *SrcOp = + MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment); + + // Extend i8 value to i64 if required + if (SrcOrValue.getValueType().getSimpleVT() == MVT::i8) { + SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue); + } + + if (IsSet) { + SDValue Ops[] = {Dst, Size, SrcOrValue, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp}); + return SDValue(Node, 2); + } else { + SDValue Ops[] = {Dst, SrcOrValue, Size, Chain}; + const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other}; + MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops); + DAG.setNodeMemRefs(Node, {DstOp, SrcOp}); + return SDValue(Node, 3); + } +} + +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) + return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + return SDValue(); +} + SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); + } + // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); ConstantSDNode *SizeValue = dyn_cast(Size); - const AArch64Subtarget &STI = - DAG.getMachineFunction().getSubtarget(); const char *bzeroName = (V && V->isZero()) ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) @@ -55,6 +142,19 @@ return SDValue(); } +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) { + return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size, + Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + } + return SDValue(); +} + static const int kSetTagLoopThreshold = 176; static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -193,6 +193,7 @@ bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); + bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); unsigned emitConstantPoolEntry(const Constant *CPVal, @@ -3425,6 +3426,13 @@ case TargetOpcode::G_VECREDUCE_FADD: case TargetOpcode::G_VECREDUCE_ADD: return selectReduction(I, MRI); + case TargetOpcode::G_BZERO: + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMCPY_INLINE: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + assert(STI.hasMOPS() && "Shouldn't get here without +mops feature"); + return selectMOPS(I, MRI); } return false; @@ -3482,6 +3490,68 @@ return false; } +bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, + MachineRegisterInfo &MRI) { + assert(GI.getOpcode() != TargetOpcode::G_BZERO && + "There is no point combining to G_BZERO only to re-materialize the " + "zero."); + + unsigned Mopcode; + switch (GI.getOpcode()) { + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMCPY_INLINE: + Mopcode = AArch64::MOPSMemoryCopy; + break; + case TargetOpcode::G_MEMMOVE: + Mopcode = AArch64::MOPSMemoryMove; + break; + case TargetOpcode::G_MEMSET: + // For tagged memset see llvm.aarch64.mops.memset.tag + Mopcode = AArch64::MOPSMemorySet; + break; + } + + auto &DstPtr = GI.getOperand(0); + auto &SrcOrVal = GI.getOperand(1); + auto &Size = GI.getOperand(2); + + // Create copies of the registers that can be clobbered. + const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); + const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); + const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); + + const bool IsSet = Mopcode == AArch64::MOPSMemorySet; + const auto &SrcValRegClass = + IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; + + // Constrain to specific registers + RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); + RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); + RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); + + MIB.buildCopy(DstPtrCopy, DstPtr); + MIB.buildCopy(SrcValCopy, SrcOrVal); + MIB.buildCopy(SizeCopy, Size); + + // New instruction uses the copied registers because it must update them. + // The defs are not used since they don't exist in G_MEM*. They are still + // tied. + // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE + Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); + Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + if (IsSet) { + MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, + {DstPtrCopy, SizeCopy, SrcValCopy}); + } else { + Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); + MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, + {DstPtrCopy, SrcValCopy, SizeCopy}); + } + + GI.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); @@ -5376,6 +5446,36 @@ constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); break; } + case Intrinsic::aarch64_mops_memset_tag: { + // Transform + // %dst:gpr(p0) = \ + // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), + // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) + // where %dst is updated, into + // %Rd:GPR64common, %Rn:GPR64) = \ + // MOPSMemorySetTagging \ + // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 + // where Rd and Rn are tied. + // It is expected that %val has been extended to s64 in legalization. + // Note that the order of the size/value operands are swapped. + + Register DstDef = I.getOperand(0).getReg(); + // I.getOperand(1) is the intrinsic function + Register DstUse = I.getOperand(2).getReg(); + Register ValUse = I.getOperand(3).getReg(); + Register SizeUse = I.getOperand(4).getReg(); + + // MOPSMemorySetTagging has two defs; the intrinsic call has only one. + // Therefore an additional virtual register is requried for the updated size + // operand. This value is not accessible via the semantics of the intrinsic. + Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); + + auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTagging, + {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); + Memset.cloneMemRefs(I); + constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); + break; + } } I.eraseFromParent(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -56,6 +56,7 @@ bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -699,8 +699,28 @@ getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); - getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) - .libcall(); + if (ST.hasMOPS()) { + // G_BZERO is not supported. Currently it is only emitted by + // PreLegalizerCombiner for G_MEMSET with zero constant. + getActionDefinitionsBuilder(G_BZERO).unsupported(); + + getActionDefinitionsBuilder(G_MEMSET) + .legalForCartesianProduct({p0}, {s64}, {s64}) + .customForCartesianProduct({p0}, {s8}, {s64}) + .immIdx(0); // Inform verifier imm idx 0 is handled. + + getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE}) + .legalForCartesianProduct({p0}, {p0}, {s64}) + .immIdx(0); // Inform verifier imm idx 0 is handled. + + // G_MEMCPY_INLINE does not have a tailcall immediate + getActionDefinitionsBuilder(G_MEMCPY_INLINE) + .legalForCartesianProduct({p0}, {p0}, {s64}); + + } else { + getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) + .libcall(); + } // FIXME: Legal types are only legal with NEON. getActionDefinitionsBuilder(G_ABS) @@ -832,6 +852,11 @@ return legalizeAtomicCmpxchg128(MI, MRI, Helper); case TargetOpcode::G_CTTZ: return legalizeCTTZ(MI, Helper); + case TargetOpcode::G_BZERO: + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + return legalizeMemOps(MI, Helper); } llvm_unreachable("expected switch to return"); @@ -989,6 +1014,15 @@ MI.eraseFromParent(); return true; } + case Intrinsic::aarch64_mops_memset_tag: { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + // Zext the value to 64 bit + MachineIRBuilder MIB(MI); + auto &Value = MI.getOperand(3); + Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0); + Value.setReg(ZExtValueReg); + return true; + } } return true; @@ -1359,3 +1393,20 @@ MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + + // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic + if (MI.getOpcode() == TargetOpcode::G_MEMSET) { + // Zext the value operand to 64 bit + auto &Value = MI.getOperand(1); + Register ZExtValueReg = + MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0); + Value.setReg(ZExtValueReg); + return true; + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -75,6 +75,7 @@ MachineIRBuilder &B) const { CombinerHelper Helper(Observer, B, KB, MDT); AArch64GenO0PreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper); + auto &ST = static_cast(B.getMF().getSubtarget()); if (Generated.tryCombineAll(Observer, MI, B)) return true; @@ -86,10 +87,15 @@ case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); case TargetOpcode::G_MEMCPY_INLINE: + if (ST.hasMOPS()) + return false; return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { + if (ST.hasMOPS()) + return false; + // At -O0 set a maxlen of 32 to inline; unsigned MaxLen = 32; // Try to inline memcpy type calls if optimizations are enabled. diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops | FileCheck %s --check-prefix=CHECK-MOPS + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +declare dso_local void @fn(i8*, i8*) local_unnamed_addr + +define hidden void @consecutive() local_unnamed_addr { +; CHECK-MOPS-LABEL: consecutive: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: sub sp, sp, #80 +; CHECK-MOPS-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 80 +; CHECK-MOPS-NEXT: .cfi_offset w30, -16 +; CHECK-MOPS-NEXT: adrp x10, .LCPI0_0 +; CHECK-MOPS-NEXT: adrp x11, .LCPI0_1 +; CHECK-MOPS-NEXT: mov w8, #31 +; CHECK-MOPS-NEXT: mov x9, sp +; CHECK-MOPS-NEXT: mov w12, #6424 +; CHECK-MOPS-NEXT: setp [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: setm [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: sete [x9]!, x8!, xzr +; CHECK-MOPS-NEXT: movk w12, #6938, lsl #16 +; CHECK-MOPS-NEXT: mov w13, #7452 +; CHECK-MOPS-NEXT: ldr q0, [x10, :lo12:.LCPI0_0] +; CHECK-MOPS-NEXT: mov w8, #30 +; CHECK-MOPS-NEXT: ldr d1, [x11, :lo12:.LCPI0_1] +; CHECK-MOPS-NEXT: add x0, sp, #32 +; CHECK-MOPS-NEXT: mov x1, sp +; CHECK-MOPS-NEXT: str w12, [sp, #56] +; CHECK-MOPS-NEXT: strh w13, [sp, #60] +; CHECK-MOPS-NEXT: str q0, [sp, #32] +; CHECK-MOPS-NEXT: str d1, [sp, #48] +; CHECK-MOPS-NEXT: strb w8, [sp, #62] +; CHECK-MOPS-NEXT: bl fn +; CHECK-MOPS-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-MOPS-NEXT: add sp, sp, #80 +; CHECK-MOPS-NEXT: ret +entry: + %buf_from = alloca [31 x i8], align 16 + %buf_to = alloca [31 x i8], align 1 + %0 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 0 + %1 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_to, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(31) %1, i8 0, i64 31, i1 false) + %2 = bitcast [31 x i8]* %buf_from to <16 x i8>* + store <16 x i8> , <16 x i8>* %2, align 16 + %arrayidx.16 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 16 + %3 = bitcast i8* %arrayidx.16 to <8 x i8>* + store <8 x i8> , <8 x i8>* %3, align 16 + %arrayidx.24 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 24 + store i8 24, i8* %arrayidx.24, align 8 + %arrayidx.25 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 25 + store i8 25, i8* %arrayidx.25, align 1 + %arrayidx.26 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 26 + store i8 26, i8* %arrayidx.26, align 2 + %arrayidx.27 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 27 + store i8 27, i8* %arrayidx.27, align 1 + %arrayidx.28 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 28 + store i8 28, i8* %arrayidx.28, align 4 + %arrayidx.29 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 29 + store i8 29, i8* %arrayidx.29, align 1 + %arrayidx.30 = getelementptr inbounds [31 x i8], [31 x i8]* %buf_from, i64 0, i64 30 + store i8 30, i8* %arrayidx.30, align 2 + call void @fn(i8* nonnull %0, i8* nonnull %1) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops,+mte | FileCheck %s --check-prefix=SDAG +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel + +; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly +declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64) + +define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_0_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov x8, xzr +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_0_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x8, xzr +; GISel-NEXT: setgp [x0]!, x8!, x8 +; GISel-NEXT: setgm [x0]!, x8!, x8 +; GISel-NEXT: setge [x0]!, x8!, x8 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 0) + ret i8* %r +} + +define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_1_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_1_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x9, xzr +; GISel-NEXT: mov w8, #1 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 1) + ret i8* %r +} + +define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_10_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_10_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x9, xzr +; GISel-NEXT: mov w8, #10 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10) + ret i8* %r +} + +define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_10000_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10000 +; SDAG-NEXT: setgp [x0]!, x8!, xzr +; SDAG-NEXT: setgm [x0]!, x8!, xzr +; SDAG-NEXT: setge [x0]!, x8!, xzr +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_10000_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x9, xzr +; GISel-NEXT: mov w8, #10000 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10000) + ret i8* %r +} + +define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) { +; SDAG-LABEL: memset_tagged_size_zeroval: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: setgp [x0]!, x1!, xzr +; SDAG-NEXT: setgm [x0]!, x1!, xzr +; SDAG-NEXT: setge [x0]!, x1!, xzr +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_size_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x8, xzr +; GISel-NEXT: setgp [x0]!, x1!, x8 +; GISel-NEXT: setgm [x0]!, x1!, x8 +; GISel-NEXT: setge [x0]!, x1!, x8 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 %size) + ret i8* %r +} + +define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_0: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov x8, xzr +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_0: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov x8, xzr +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 0) + ret i8* %r +} + +define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_1: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_1: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov w8, #1 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 1) + ret i8* %r +} + +define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_10: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_10: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov w8, #10 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10) + ret i8* %r +} + +define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_10000: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mov w8, #10000 +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x8!, x2 +; SDAG-NEXT: setgm [x0]!, x8!, x2 +; SDAG-NEXT: setge [x0]!, x8!, x2 +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_10000: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov w8, #10000 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10000) + ret i8* %r +} + +define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) { +; SDAG-LABEL: memset_tagged_size: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: // kill: def $w2 killed $w2 def $x2 +; SDAG-NEXT: setgp [x0]!, x1!, x2 +; SDAG-NEXT: setgm [x0]!, x1!, x2 +; SDAG-NEXT: setge [x0]!, x1!, x2 +; SDAG-NEXT: ret +; +; GISel-LABEL: memset_tagged_size: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x8 +; GISel-NEXT: mov w8, w2 +; GISel-NEXT: setgp [x0]!, x1!, x8 +; GISel-NEXT: setgm [x0]!, x1!, x8 +; GISel-NEXT: setge [x0]!, x1!, x8 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 %size) + ret i8* %r +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -0,0 +1,1373 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 | FileCheck %s --check-prefix=O2-SDAG-WITHOUT-MOPS +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O2 -mattr=+mops | FileCheck %s --check-prefix=O2-SDAG-MOPS + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefix=O0-GISel-WITHOUT-MOPS +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops | FileCheck %s --check-prefix=O0-GISel-MOPS + +; Function Attrs: argmemonly nofree nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + + +define void @memset_0_zeroval(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false) + ret void +} + +define void @memset_0_zeroval_volatile(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true) + ret void +} + +define void @memset_10_zeroval(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str xzr, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str xzr, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 false) + ret void +} + +define void @memset_10_zeroval_volatile(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str xzr, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 true) + ret void +} + +define void @memset_10000_zeroval(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 false) + ret void +} + +define void @memset_10000_zeroval_volatile(i8* %dst) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, xzr +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 true) + ret void +} + +define void @memset_size_zeroval(i8* %dst, i64 %size) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size_zeroval: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size_zeroval: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov x2, x1 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 false) + ret void +} + +define void @memset_size_zeroval_volatile(i8* %dst, i64 %size) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size_zeroval_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, wzr +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size_zeroval_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, xzr +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov x2, x1 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 true) + ret void +} + + +define void @memset_0(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false) + ret void +} + +define void @memset_0_volatile(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true) + ret void +} + +define void @memset_10(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, #72340172838076673 +; O2-SDAG-WITHOUT-MOPS-NEXT: and x9, x1, #0xff +; O2-SDAG-WITHOUT-MOPS-NEXT: mul x8, x9, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: // implicit-def: $x8 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, w1 +; O0-GISel-WITHOUT-MOPS-NEXT: and x8, x8, #0xff +; O0-GISel-WITHOUT-MOPS-NEXT: mov x9, #72340172838076673 +; O0-GISel-WITHOUT-MOPS-NEXT: mul x8, x8, x9 +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: // kill: def $w8 killed $w8 killed $x8 +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 false) + ret void +} + +define void @memset_10_volatile(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, #72340172838076673 +; O2-SDAG-WITHOUT-MOPS-NEXT: and x9, x1, #0xff +; O2-SDAG-WITHOUT-MOPS-NEXT: mul x8, x9, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 true) + ret void +} + +define void @memset_10000(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 false) + ret void +} + +define void @memset_10000_volatile(i8* %dst, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_10000_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #10000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_10000_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10000 +; O2-SDAG-MOPS-NEXT: // kill: def $w1 killed $w1 def $x1 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x8!, x1 +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 true) + ret void +} + +define void @memset_size(i8* %dst, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, w2 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: // kill: def $w2 killed $w2 def $x2 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: sub sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 32 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, w2 +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: add sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x8 +; O0-GISel-MOPS-NEXT: mov w8, w2 +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 false) + ret void +} + +define void @memset_size_volatile(i8* %dst, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memset_size_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x8, x1 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w1, w2 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov x2, x8 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memset +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memset_size_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: // kill: def $w2 killed $w2 def $x2 +; O2-SDAG-MOPS-NEXT: setp [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: setm [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: sete [x0]!, x1!, x2 +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: sub sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 32 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, w2 +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: add sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x8 +; O0-GISel-MOPS-NEXT: mov w8, w2 +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 true) + ret void +} + + +define void @memcpy_0(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) + ret void +} + +define void @memcpy_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) + ret void +} + +define void @memcpy_10(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x9, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) + ret void +} + +define void @memcpy_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) + ret void +} + +define void @memcpy_1000(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_1000: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_1000: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_1000: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_1000: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false) + ret void +} + +define void @memcpy_1000_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_1000_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_1000_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_1000_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_1000_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true) + ret void +} + +define void @memcpy_n(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_n: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_n: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_n: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_n: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false) + ret void +} + +define void @memcpy_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_n_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memcpy +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_n_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_n_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_n_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true) + ret void +} + + +define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) + ret void +} + +define void @memcpy_inline_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) + ret void +} + +define void @memcpy_inline_10(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x9, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) + ret void +} + +define void @memcpy_inline_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memcpy_inline_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memcpy_inline_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) + ret void +} + + +define void @memmove_0(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_0: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_0: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) + ret void +} + +define void @memmove_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_0_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_0_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) + ret void +} + +define void @memmove_10(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_10: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x9, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_10: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: str x9, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) + ret void +} + +define void @memmove_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_10_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O2-SDAG-WITHOUT-MOPS-NEXT: ldrh w9, [x1, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: strh w9, [x0, #8] +; O2-SDAG-WITHOUT-MOPS-NEXT: str x8, [x0] +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_10_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #10 +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) + ret void +} + +define void @memmove_1000(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_1000: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_1000: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_1000: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_1000: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false) + ret void +} + +define void @memmove_1000_volatile(i8* %dst, i8* %src, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_1000_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: mov w2, #1000 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_1000_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: mov w8, #1000 +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_1000_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_1000_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true) + ret void +} + +define void @memmove_n(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_n: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_n: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_n: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_n: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false) + ret void +} + +define void @memmove_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O2-SDAG-WITHOUT-MOPS-LABEL: memmove_n_volatile: +; O2-SDAG-WITHOUT-MOPS: // %bb.0: // %entry +; O2-SDAG-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O2-SDAG-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O2-SDAG-WITHOUT-MOPS-NEXT: bl memmove +; O2-SDAG-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O2-SDAG-WITHOUT-MOPS-NEXT: ret +; +; O2-SDAG-MOPS-LABEL: memmove_n_volatile: +; O2-SDAG-MOPS: // %bb.0: // %entry +; O2-SDAG-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O2-SDAG-MOPS-NEXT: ret +; +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_n_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_n_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true) + ret void +}