diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20355,7 +20355,9 @@ In the most general case call to the '``llvm.memcpy.element.unordered.atomic.*``' is lowered to a call to the symbol ``__llvm_memcpy_element_unordered_atomic_*``. Where '*' -is replaced with an actual element size. +is replaced with an actual element size. See :ref:`RewriteStatepointsForGC intrinsic +lowering ` for details on GC specific +lowering. Optimizer is allowed to inline memory copy when it's profitable to do so. @@ -20432,7 +20434,9 @@ In the most general case call to the '``llvm.memmove.element.unordered.atomic.*``' is lowered to a call to the symbol ``__llvm_memmove_element_unordered_atomic_*``. Where '*' is replaced with an -actual element size. +actual element size. See :ref:`RewriteStatepointsForGC intrinsic lowering +` for details on GC specific +lowering. The optimizer is allowed to inline the memory copy when it's profitable to do so. diff --git a/llvm/docs/Statepoints.rst b/llvm/docs/Statepoints.rst --- a/llvm/docs/Statepoints.rst +++ b/llvm/docs/Statepoints.rst @@ -817,6 +817,50 @@ pipeline, after most optimization is already done. This helps to improve the quality of the generated code when compiled with garbage collection support. +.. _RewriteStatepointsForGC_intrinsic_lowering: + +RewriteStatepointsForGC intrinsic lowering +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As a part of lowering to the explicit model of relocations +RewriteStatepointsForGC performs GC specific lowering for +'``llvm.memcpy.element.unordered.atomic.*``', +'``llvm.memmove.element.unordered.atomic.*``' intrinsics. + +There are two possible lowerings for these copy operations: GC leaf lowering +and GC parseable lowering. If a call is explicitly marked with +"gc-leaf-function" attribute the call is lowered to a GC leaf call to +'``__llvm_memcpy_element_unordered_atomic_*``' or +'``__llvm_memmove_element_unordered_atomic_*``' symbol. Such a call can not +take a safepoint. Otherwise, the call is made GC parseable by wrapping the +call into a statepoint. This makes it possible to take a safepoint during +copy operation. Note that a GC parseable copy operation is not required to +take a safepoint. For example, a short copy operation may be performed without +taking a safepoint. + +GC parseable calls to '``llvm.memcpy.element.unordered.atomic.*``', +'``llvm.memmove.element.unordered.atomic.*``' intrinsics are lowered to calls +to '``__llvm_memcpy_element_unordered_atomic_safepoint_*``', +'``__llvm_memmove_element_unordered_atomic_safepoint_*``' symbols respectively. +This way the runtime can provide implementations of copy operations with and +without safepoints. + +GC parseable lowering also involves adjusting the arguments for the call. +Memcpy and memmove intrinsics take derived pointers as source and destination +arguments. If a copy operation takes a safepoint it might need to relocate the +underlying source and destination objects. This requires the corresponding base +pointers to be available in the copy operation. In order to make the base +pointers available RewriteStatepointsForGC replaces derived pointers with base +pointer and offset pairs. For example: + +.. code-block:: llvm + + declare void @__llvm_memcpy_element_unordered_atomic_safepoint_1( + i8 addrspace(1)* %dest_base, i64 %dest_offset, + i8 addrspace(1)* %src_base, i64 %src_offset, + i64 %length) + + .. _PlaceSafepoints: PlaceSafepoints diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1543,6 +1543,101 @@ .getCallee(); IsDeoptimize = true; + } else if (IID == Intrinsic::memcpy_element_unordered_atomic || + IID == Intrinsic::memmove_element_unordered_atomic) { + // Unordered atomic memcpy and memmove intrinsics which are not explicitly + // marked as "gc-leaf-function" should be lowered in a GC parseable way. + // Specifically, these calls should be lowered to the + // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols. + // Similarly to __llvm_deoptimize we want to resolve this now, since the + // verifier does not allow taking the address of an intrinsic function. + // + // Moreover we need to shuffle the arguments for the call in order to + // accommodate GC. The underlying source and destination objects might be + // relocated during copy operation should the GC occur. To relocate the + // derived source and destination pointers the implementation of the + // intrinsic should know the corresponding base pointers. + // + // To make the base pointers available pass them explicitly as arguments: + // memcpy(dest_derived, source_derived, ...) => + // memcpy(dest_base, dest_offset, source_base, source_offset, ...) + auto &Context = Call->getContext(); + auto &DL = Call->getModule()->getDataLayout(); + auto GetBaseAndOffset = [&](Value *Derived) { + assert(Result.PointerToBase.count(Derived)); + unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); + unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); + Value *Base = Result.PointerToBase.find(Derived)->second; + Value *Base_int = Builder.CreatePtrToInt( + Base, Type::getIntNTy(Context, IntPtrSize)); + Value *Derived_int = Builder.CreatePtrToInt( + Derived, Type::getIntNTy(Context, IntPtrSize)); + return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int)); + }; + + auto *Dest = CallArgs[0]; + Value *DestBase, *DestOffset; + std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest); + + auto *Source = CallArgs[1]; + Value *SourceBase, *SourceOffset; + std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source); + + auto *LengthInBytes = CallArgs[2]; + auto *ElementSizeCI = cast(CallArgs[3]); + + CallArgs.clear(); + CallArgs.push_back(DestBase); + CallArgs.push_back(DestOffset); + CallArgs.push_back(SourceBase); + CallArgs.push_back(SourceOffset); + CallArgs.push_back(LengthInBytes); + + SmallVector DomainTy; + for (Value *Arg : CallArgs) + DomainTy.push_back(Arg->getType()); + auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy, + /* isVarArg = */ false); + + auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) { + uint64_t ElementSize = ElementSizeCI->getZExtValue(); + if (IID == Intrinsic::memcpy_element_unordered_atomic) { + switch (ElementSize) { + case 1: + return "__llvm_memcpy_element_unordered_atomic_safepoint_1"; + case 2: + return "__llvm_memcpy_element_unordered_atomic_safepoint_2"; + case 4: + return "__llvm_memcpy_element_unordered_atomic_safepoint_4"; + case 8: + return "__llvm_memcpy_element_unordered_atomic_safepoint_8"; + case 16: + return "__llvm_memcpy_element_unordered_atomic_safepoint_16"; + default: + llvm_unreachable("unexpected element size!"); + } + } + assert(IID == Intrinsic::memmove_element_unordered_atomic); + switch (ElementSize) { + case 1: + return "__llvm_memmove_element_unordered_atomic_safepoint_1"; + case 2: + return "__llvm_memmove_element_unordered_atomic_safepoint_2"; + case 4: + return "__llvm_memmove_element_unordered_atomic_safepoint_4"; + case 8: + return "__llvm_memmove_element_unordered_atomic_safepoint_8"; + case 16: + return "__llvm_memmove_element_unordered_atomic_safepoint_16"; + default: + llvm_unreachable("unexpected element size!"); + } + }; + + CallTarget = + F->getParent() + ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy) + .getCallee(); } } @@ -2584,8 +2679,27 @@ assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); auto NeedsRewrite = [&TLI](Instruction &I) { - if (const auto *Call = dyn_cast(&I)) - return !callsGCLeafFunction(Call, TLI) && !isa(Call); + if (const auto *Call = dyn_cast(&I)) { + if (isa(Call)) + return false; + if (callsGCLeafFunction(Call, TLI)) + return false; + + // Normally it's up to the frontend to make sure that non-leaf calls also + // have proper deopt state if it is required. We make an exception for + // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics + // these are non-leaf by default. They might be generated by the optimizer + // which doesn't know how to produce a proper deopt state. So if we see a + // non-leaf memcpy/memmove without deopt state just treat it as a leaf + // copy and don't produce a statepoint. + if (!AllowStatepointWithNoDeoptInfo && + !Call->getOperandBundle(LLVMContext::OB_deopt)) { + assert((isa(Call) || isa(Call)) && + "Don't expect any other calls here!"); + return false; + } + return true; + } return false; }; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2672,10 +2672,13 @@ if (F->hasFnAttribute("gc-leaf-function")) return true; - if (auto IID = F->getIntrinsicID()) + if (auto IID = F->getIntrinsicID()) { // Most LLVM intrinsics do not take safepoints. return IID != Intrinsic::experimental_gc_statepoint && - IID != Intrinsic::experimental_deoptimize; + IID != Intrinsic::experimental_deoptimize && + IID != Intrinsic::memcpy_element_unordered_atomic && + IID != Intrinsic::memmove_element_unordered_atomic; + } } // Lib calls can be materialized by some passes, and won't be diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll @@ -0,0 +1,52 @@ +; RUN: opt -passes=rewrite-statepoints-for-gc -rs4gc-allow-statepoint-with-no-deopt-info=0 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-REQUIRE-DEOPT +; RUN: opt -passes=rewrite-statepoints-for-gc -rs4gc-allow-statepoint-with-no-deopt-info=1 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO-REQUIRE-DEOPT + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg) +declare void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg) + +define void @test_memcpy_no_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: @test_memcpy_no_deopt +; CHECK-REQUIRE-DEOPT-NOT: @llvm.experimental.gc.statepoint +; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) + ret void +} + +define void @test_memmove_no_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: @test_memmove_no_deopt +; CHECK-REQUIRE-DEOPT-NOT: @llvm.experimental.gc.statepoint +; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) + ret void +} + +define void @test_memcpy_with_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: @test_memcpy_with_deopt +; CHECK-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint +; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ] + ret void +} + +define void @test_memmove_with_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: @test_memmove_with_deopt +; CHECK-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint +; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ] + ret void +} diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll @@ -0,0 +1,199 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; Use instcombine to cleanup offset computation. +; RUN: opt -passes=rewrite-statepoints-for-gc,instcombine -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128-p1:64:64" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg) +declare void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg) + +define void @test_memcpy_gc_leaf_function(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memcpy_gc_leaf_function +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]] +; CHECK-NEXT: [[DEST_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) [[ATTR2:#.*]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) [[ATTR2]] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function" + ret void +} + +define void @test_memcpy_element_atomic_1(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_1 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_1, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) + ret void +} + +define void @test_memcpy_element_atomic_2(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_2 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_2, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2) + ret void +} + +define void @test_memcpy_element_atomic_4(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_4 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_4, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4) + ret void +} + +define void @test_memcpy_element_atomic_8(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_8 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_8, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8) + ret void +} + +define void @test_memcpy_element_atomic_16(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_16 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_16, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16) + ret void +} + +define void @test_memmove_gc_leaf_function(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memmove_gc_leaf_function +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]] +; CHECK-NEXT: [[DEST_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) [[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) [[ATTR2]] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function" + ret void +} + +define void @test_memmove_element_atomic_1(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_1 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_1, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) + ret void +} + +define void @test_memmove_element_atomic_2(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_2 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_2, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2) + ret void +} + +define void @test_memmove_element_atomic_4(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_4 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_4, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4) + ret void +} + +define void @test_memmove_element_atomic_8(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_8 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_8, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8) + ret void +} + +define void @test_memmove_element_atomic_16(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" { +; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_16 +; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_16, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ] +; CHECK-NEXT: ret void +; +entry: + %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset + %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset + call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16) + ret void +}