Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -436,7 +436,11 @@ bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr) { + return CreateMemCpyOrMemMove(Intrinsic::memcpy, Dst, DstAlign, Src, + SrcAlign, Size, isVolatile, TBAATag, + TBAAStructTag, ScopeTag, NoAliasTag); + } // TODO: Old API. Remove this when no longer used. CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, @@ -479,7 +483,12 @@ Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr) { + return CreateElementUnorderedAtomicMemCpyOrMemMove( + Intrinsic::memcpy_element_unordered_atomic, Dst, DstAlign, Src, + SrcAlign, Size, ElementSize, TBAATag, TBAAStructTag, ScopeTag, + NoAliasTag); + } /// \brief Create and insert a memmove between the specified /// pointers. @@ -495,10 +504,16 @@ TBAATag, ScopeTag, NoAliasTag); } - CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, - Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr, + CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, + bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr) { + return CreateMemCpyOrMemMove(Intrinsic::memmove, Dst, DstAlign, Src, + SrcAlign, Size, isVolatile, TBAATag, + /*TBAAStructTag*/ nullptr, ScopeTag, + NoAliasTag); + } // TODO: Old API. Remove this when no longer used. CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align, @@ -517,6 +532,34 @@ ScopeTag, NoAliasTag); } + /// \brief Create and insert an element unordered-atomic memmove between the + /// specified pointers. + /// + /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers, + /// respectively. + /// + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. + CallInst *CreateElementUnorderedAtomicMemMove( + Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, + uint64_t Size, uint32_t ElementSize, MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { + return CreateElementUnorderedAtomicMemMove(Dst, DstAlign, Src, SrcAlign, + getInt64(Size), ElementSize, + TBAATag, ScopeTag, NoAliasTag); + } + + CallInst *CreateElementUnorderedAtomicMemMove( + Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, Value *Size, + uint32_t ElementSize, MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { + return CreateElementUnorderedAtomicMemCpyOrMemMove( + Intrinsic::memmove_element_unordered_atomic, Dst, DstAlign, Src, + SrcAlign, Size, ElementSize, TBAATag, /*TBAAStructTag*/ nullptr, + ScopeTag, NoAliasTag); + } + /// \brief Create a vector fadd reduction intrinsic of the source vector. /// The first parameter is a scalar accumulator value for ordered reductions. CallInst *CreateFAddReduce(Value *Acc, Value *Src); @@ -685,6 +728,18 @@ ArrayRef OverloadedTypes, const Twine &Name = ""); + CallInst *CreateMemCpyOrMemMove(Intrinsic::ID ID, Value *Dst, + unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, + bool isVolatile, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, + MDNode *NoAliasTag); + + CallInst *CreateElementUnorderedAtomicMemCpyOrMemMove( + Intrinsic::ID ID, Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag); + Value *getCastedInt8PtrValue(Value *Ptr); }; Index: lib/IR/IRBuilder.cpp =================================================================== --- lib/IR/IRBuilder.cpp +++ lib/IR/IRBuilder.cpp @@ -107,27 +107,31 @@ return CI; } -CallInst *IRBuilderBase:: -CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, - Value *Size, bool isVolatile, MDNode *TBAATag, - MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { - assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && "Must be 0 or a power of 2"); - assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && "Must be 0 or a power of 2"); +CallInst *IRBuilderBase::CreateMemCpyOrMemMove( + Intrinsic::ID ID, Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { + assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove) && + "invalid intrinsic ID"); + assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && + "Must be 0 or a power of 2"); + assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && + "Must be 0 or a power of 2"); Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)}; - Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; + Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()}; Module *M = BB->getParent()->getParent(); - Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); - + Value *TheFn = Intrinsic::getDeclaration(M, ID, Tys); + CallInst *CI = createCallHelper(TheFn, Ops, this); - auto* MCI = cast(CI); + auto *MTI = cast(CI); if (DstAlign > 0) - MCI->setDestAlignment(DstAlign); + MTI->setDestAlignment(DstAlign); if (SrcAlign > 0) - MCI->setSourceAlignment(SrcAlign); + MTI->setSourceAlignment(SrcAlign); // Set the TBAA info if present. if (TBAATag) @@ -136,20 +140,23 @@ // Set the TBAA Struct info if present. if (TBAAStructTag) CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); - + if (ScopeTag) CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); - + if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); - return CI; + return CI; } -CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( - Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, Value *Size, - uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, - MDNode *ScopeTag, MDNode *NoAliasTag) { +CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpyOrMemMove( + Intrinsic::ID ID, Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { + assert((ID == Intrinsic::memcpy_element_unordered_atomic || + ID == Intrinsic::memmove_element_unordered_atomic) && + "invalid intrinsic ID"); assert(DstAlign >= ElementSize && "Pointer alignment must be at least element size"); assert(SrcAlign >= ElementSize && @@ -160,15 +167,14 @@ Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)}; Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()}; Module *M = BB->getParent()->getParent(); - Value *TheFn = Intrinsic::getDeclaration( - M, Intrinsic::memcpy_element_unordered_atomic, Tys); + Value *TheFn = Intrinsic::getDeclaration(M, ID, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the alignment of the pointer args. - auto *AMCI = cast(CI); - AMCI->setDestAlignment(DstAlign); - AMCI->setSourceAlignment(SrcAlign); + auto *AMTI = cast(CI); + AMTI->setDestAlignment(DstAlign); + AMTI->setSourceAlignment(SrcAlign); // Set the TBAA info if present. if (TBAATag) @@ -187,41 +193,6 @@ return CI; } -CallInst *IRBuilderBase:: -CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, - Value *Size, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, - MDNode *NoAliasTag) { - assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && "Must be 0 or a power of 2"); - assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && "Must be 0 or a power of 2"); - Dst = getCastedInt8PtrValue(Dst); - Src = getCastedInt8PtrValue(Src); - - Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)}; - Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; - Module *M = BB->getParent()->getParent(); - Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys); - - CallInst *CI = createCallHelper(TheFn, Ops, this); - - auto *MMI = cast(CI); - if (DstAlign > 0) - MMI->setDestAlignment(DstAlign); - if (SrcAlign > 0) - MMI->setSourceAlignment(SrcAlign); - - // Set the TBAA info if present. - if (TBAATag) - CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); - - if (ScopeTag) - CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); - - if (NoAliasTag) - CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); - - return CI; -} - static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID, Value *Src) { Module *M = Builder->GetInsertBlock()->getParent()->getParent(); Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -151,7 +151,7 @@ Memset, MemsetPattern, MemcpyOrMemmove, - UnorderedAtomicMemcpy, + UnorderedAtomicMemcpyOrMemmove, DontUse // Dummy retval never to be used. Allows catching errors in retval // handling. }; @@ -480,12 +480,10 @@ if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) return LegalStoreKind::None; - if (UnorderedAtomic || LI->isAtomic()) - return HasMemcpy ? LegalStoreKind::UnorderedAtomicMemcpy - : LegalStoreKind::None; - // Success. This store can be converted into a memcpy or memmove. - return LegalStoreKind::MemcpyOrMemmove; + return (UnorderedAtomic || LI->isAtomic()) + ? LegalStoreKind::UnorderedAtomicMemcpyOrMemmove + : LegalStoreKind::MemcpyOrMemmove; } // This store can't be transformed into a memset/memcpy. return LegalStoreKind::None; @@ -516,7 +514,7 @@ StoreRefsForMemsetPattern[Ptr].push_back(SI); } break; case LegalStoreKind::MemcpyOrMemmove: - case LegalStoreKind::UnorderedAtomicMemcpy: + case LegalStoreKind::UnorderedAtomicMemcpyOrMemmove: StoreRefsForMemcpyOrMemmove.push_back(SI); break; default: @@ -1052,19 +1050,7 @@ // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must necessarily be unordered // by previous checks. - bool IsAtomicLoadOrStore = SI->isAtomic() || LI->isAtomic(); - - // FIXME: We should build an atomic memmove lowering like we have for - // memcpy. - assert((!IsAtomicLoadOrStore || !PerformMemmove) && - "cannot memmove atomic load or store"); - - if (PerformMemmove) - NewCall = Builder.CreateMemMove(StoreBasePtr, Align, LoadBasePtr, Align, - NumBytes); - else if (!IsAtomicLoadOrStore) - NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); - else { + if (SI->isAtomic() || LI->isAtomic()) { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. if (Align < StoreSize) @@ -1080,10 +1066,20 @@ // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. - NewCall = Builder.CreateElementUnorderedAtomicMemCpy( - StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(), - NumBytes, StoreSize); - } + if (PerformMemmove) + NewCall = Builder.CreateElementUnorderedAtomicMemMove( + StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(), + NumBytes, StoreSize); + else + NewCall = Builder.CreateElementUnorderedAtomicMemCpy( + StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(), + NumBytes, StoreSize); + } else if (PerformMemmove) + NewCall = Builder.CreateMemMove(StoreBasePtr, Align, LoadBasePtr, Align, + NumBytes); + else + NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + NewCall->setDebugLoc(SI->getDebugLoc()); Index: test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll =================================================================== --- test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll +++ test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll @@ -454,3 +454,58 @@ for.end: ; preds = %for.body ret void } + +;; memmove.atomic formation (atomic load & store) +define void @test_memmove_one(i8* readonly %begin, i8* readnone %end, i8* nocapture %out) nounwind ssp { +entry: + %cmp1 = icmp eq i8* %begin, %end + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %dest.i = phi i8* [ %dest.next, %for.body ], [ %out, %for.body.preheader ] + %begin.i = phi i8* [ %begin.next, %for.body ], [ %begin, %for.body.preheader ] + %0 = load atomic i8, i8* %begin.i unordered, align 1 + store atomic i8 %0, i8* %dest.i unordered, align 1 + %begin.next = getelementptr inbounds i8, i8* %begin.i, i64 1 + %dest.next = getelementptr inbounds i8, i8* %dest.i, i64 1 + %cmp = icmp eq i8* %begin.next, %end + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK-LABEL: @test_memmove_one( +; CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %begin, +; CHECK-NOT: store +; CHECK: ret void +} + + +;; memmove.atomic formation (atomic store & normal load) +define void @test_memmove_two(i8* readonly %begin, i8* readnone %end, i8* nocapture %out) nounwind ssp { +entry: + %cmp1 = icmp eq i8* %begin, %end + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %dest.i = phi i8* [ %dest.next, %for.body ], [ %out, %for.body.preheader ] + %begin.i = phi i8* [ %begin.next, %for.body ], [ %begin, %for.body.preheader ] + %0 = load i8, i8* %begin.i, align 1 + store atomic i8 %0, i8* %dest.i unordered, align 1 + %begin.next = getelementptr inbounds i8, i8* %begin.i, i64 1 + %dest.next = getelementptr inbounds i8, i8* %dest.i, i64 1 + %cmp = icmp eq i8* %begin.next, %end + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK-LABEL: @test_memmove_two( +; CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %begin, +; CHECK-NOT: store +; CHECK: ret void +}