diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26979,6 +26979,10 @@ as arrays with elements that are exactly ``element_size`` bytes, and the copy between buffers uses a sequence of :ref:`unordered atomic ` load/store operations that are a positive integer multiple of the ``element_size`` in size. +If this intrinsic is called in a function that requires garbage collection +:ref:`gc`, then the ``dest`` and ``src`` arguments should have ``elementtype`` +attribute set. The element type information is needed to choose the correct lowering for +the intrinsic. Arguments: """""""""" @@ -27054,6 +27058,10 @@ bytes, and the copy between buffers uses a sequence of :ref:`unordered atomic ` load/store operations that are a positive integer multiple of the ``element_size`` in size. +If this intrinsic is called in a function that requires garbage collection +:ref:`gc`, then the ``dest`` and ``src`` arguments should have ``elementtype`` +attribute set. The element type information is needed to choose the correct lowering for +the intrinsic. Arguments: """""""""" diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -681,7 +681,8 @@ Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr, + std::optional ElementType = std::nullopt); CallInst *CreateMemMove(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, @@ -716,7 +717,8 @@ Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr, + std::optional ElementTy = std::nullopt); private: CallInst *getReductionIntrinsic(Intrinsic::ID ID, Value *Src); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1070,6 +1070,9 @@ // This class wraps the atomic memcpy/memmove intrinsics // i.e. llvm.element.unordered.atomic.memcpy/memmove class AtomicMemTransferInst : public MemTransferBase { +private: + enum { ARG_DEST = 0, ARG_SOURCE = 1 }; + public: static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { @@ -1083,6 +1086,10 @@ static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + // These API returns a type only if the AtomicMemTransferInst is + // called from a function that has a GC strategy specified. + Type *getSourceElementType() const { return getParamElementType(ARG_SOURCE); } + Type *getDestElementType() const { return getParamElementType(ARG_DEST); } }; /// This class represents the atomic memcpy intrinsic diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -256,7 +256,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, - MDNode *ScopeTag, MDNode *NoAliasTag) { + MDNode *ScopeTag, MDNode *NoAliasTag, std::optional ElementTy) { assert(DstAlign >= ElementSize && "Pointer alignment must be at least element size"); assert(SrcAlign >= ElementSize && @@ -288,13 +288,20 @@ if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + if (ElementTy.has_value()) { + CI->addParamAttr(0, Attribute::get(CI->getContext(), Attribute::ElementType, + *ElementTy)); + CI->addParamAttr(1, Attribute::get(CI->getContext(), Attribute::ElementType, + *ElementTy)); + } + return CI; } CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, - MDNode *ScopeTag, MDNode *NoAliasTag) { + MDNode *ScopeTag, MDNode *NoAliasTag, std::optional ElementTy) { assert(DstAlign >= ElementSize && "Pointer alignment must be at least element size"); assert(SrcAlign >= ElementSize && @@ -325,6 +332,13 @@ if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + if (ElementTy.has_value()) { + CI->addParamAttr(0, Attribute::get(CI->getContext(), Attribute::ElementType, + *ElementTy)); + CI->addParamAttr(1, Attribute::get(CI->getContext(), Attribute::ElementType, + *ElementTy)); + } + return CI; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5180,6 +5180,17 @@ } case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: + { + const auto *AMTI = cast(&Call); + if (Call.getParent()->getParent()->hasGC()) { + Check(AMTI->getSourceElementType() != nullptr, + "elementtype required for atomic memory intrinsic with GC support", + Call); + Check(AMTI->getSourceElementType() == AMTI->getDestElementType(), + "expected source and dest element type to be same", Call); + } + } + [[fallthrough]]; case Intrinsic::memset_element_unordered_atomic: { const auto *AMI = cast(&Call); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1413,12 +1413,19 @@ if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) return Changed; + std::optional ElementType = std::nullopt; + // Record ElementType for correct lowering of atomic memcpy if the function + // requires GC support. + if (TheLoad->getFunction()->hasGC()) + ElementType = TheLoad->getType(); + // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize, - AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); + AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias, + ElementType); } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll b/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll --- a/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll @@ -41,6 +41,44 @@ ret void } + +define void @test1_gc(i64 %Size) nounwind ssp gc "statepoint-example" { +; CHECK-LABEL: @test1_gc( +; CHECK-NEXT: bb.nph: +; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1 +; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1 +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr elementtype(i8) align 1 [[DEST]], ptr elementtype(i8) align 1 [[BASE]], i64 [[SIZE:%.*]], i32 1) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]] +; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]] +; CHECK-NEXT: [[V:%.*]] = load atomic i8, ptr [[I_0_014]] unordered, align 1 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +bb.nph: + %Base = alloca i8, i32 10000 + %Dest = alloca i8, i32 10000 + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] + %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar + %DestI = getelementptr i8, ptr %Dest, i64 %indvar + %V = load atomic i8, ptr %I.0.014 unordered, align 1 + store atomic i8 %V, ptr %DestI unordered, align 1 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + ;; memcpy.atomic formation (atomic store, normal load) define void @test2(i64 %Size) nounwind ssp { ; CHECK-LABEL: @test2( diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll --- a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll @@ -14,7 +14,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) ret void } @@ -25,7 +25,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) ret void } @@ -36,7 +36,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ] + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ] ret void } @@ -47,6 +47,6 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ] + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ] ret void } diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll --- a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll @@ -14,22 +14,22 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SRC_DERIVED:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[SRC_OFFSET]] ; CHECK-NEXT: [[DEST_DERIVED:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DEST]], i64 [[DEST_OFFSET]] -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) #[[ATTR2]] ; CHECK-NEXT: ret void ; entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function" - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function" - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function" - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function" - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function" + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function" ret void } @@ -43,7 +43,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) ret void } @@ -57,7 +57,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 2) + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 2) ret void } @@ -71,7 +71,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 4) + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 4) ret void } @@ -85,7 +85,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 8) + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 8) ret void } @@ -99,7 +99,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 16) + call void @llvm.memcpy.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 16) ret void } @@ -109,22 +109,22 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SRC_DERIVED:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[SRC_OFFSET]] ; CHECK-NEXT: [[DEST_DERIVED:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DEST]], i64 [[DEST_OFFSET]] -; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) #[[ATTR2]] -; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 [[SRC_DERIVED]], ptr addrspace(1) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[SRC_DERIVED]], ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) #[[ATTR2]] ; CHECK-NEXT: ret void ; entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function" - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function" - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function" - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function" - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function" + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function" ret void } @@ -138,7 +138,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 1) + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 1) ret void } @@ -152,7 +152,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 2) + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 2) ret void } @@ -166,7 +166,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 4) + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 4) ret void } @@ -180,7 +180,7 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 8) + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 8) ret void } @@ -194,6 +194,6 @@ entry: %src_derived = getelementptr inbounds i8, ptr addrspace(1) %src, i64 %src_offset %dest_derived = getelementptr inbounds i8, ptr addrspace(1) %dest, i64 %dest_offset - call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) align 16 %src_derived, ptr addrspace(1) align 16 %dest_derived, i32 %len, i32 16) + call void @llvm.memmove.element.unordered.atomic.p1.p1.i32(ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %src_derived, ptr addrspace(1) elementtype(ptr addrspace(1)) align 16 %dest_derived, i32 %len, i32 16) ret void }