Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -167,24 +167,26 @@ for (User *U : I->users()) { Instruction *UI = cast(U); Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) + IndicesVector Indices; + LoadInst *L = dyn_cast(UI); + if (L) SrcTy = L->getType(); else SrcTy = cast(UI)->getSourceElementType(); - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); - II != IE; ++II) - Indices.push_back(cast(*II)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); + if (L == nullptr) { // not a direct load (loads can have 2 operands) + Indices.reserve(UI->getNumOperands() - 1); + // This will record direct loads without any indices, and gep+loads + // with the GEP indices. + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); + II != IE; ++II) + Indices.push_back(cast(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + } ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) + if (L) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis @@ -308,6 +310,7 @@ AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); + newLoad->setAAMetadataNoAliasProvenance(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1012,6 +1012,12 @@ case Instruction::Call: case Instruction::Invoke: { CallBase &CB = cast(*RVI); + if (CB.getIntrinsicID() == Intrinsic::noalias_arg_guard || + CB.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // Look through a noalias arg/copy guard + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + } Function *Callee = CB.getCalledFunction(); // A call to a node within the SCC is assumed to return null until // proven otherwise Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -215,6 +215,9 @@ Value *V = SI->getValueOperand(); if (isa(V)) { Changed = true; + // skip potential second use from ptr_provenance before the erase + if ((UI != E) && (*UI == SI)) + UI++; SI->eraseFromParent(); } else if (Instruction *I = dyn_cast(V)) { if (I->hasOneUse()) @@ -794,6 +797,9 @@ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); // If we were able to delete all uses of the loads if (LI->use_empty()) { + // skip potential second use from ptr_provenance before the erase + if ((GUI != E) && (*GUI == LI)) + GUI++; LI->eraseFromParent(); Changed = true; } else { @@ -1281,6 +1287,9 @@ // users. for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from ptr_provenance + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } } @@ -1293,6 +1302,8 @@ std::vector > &PHIsToRewrite) { for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { Instruction *User = cast(*UI++); + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } @@ -1430,6 +1441,9 @@ // of the per-field globals instead. for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from ptr_provenance + if ((UI != E) && (*UI == User)) + ++UI; if (LoadInst *LI = dyn_cast(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4350,6 +4350,22 @@ } break; } + case Intrinsic::noalias_decl: { + Value *Op0 = II->getOperand(0); + Value *BaseOp0 = Op0->stripPointerCasts(); + if (Op0 != BaseOp0) { + auto *NewNoAlias = Builder.CreateNoAliasDeclaration( + BaseOp0, II->getOperand(1), II->getOperand(2)); + // bwaahh seems we need to do everything ourselves ? + II->replaceAllUsesWith(NewNoAlias); + Worklist.remove(II); + II->eraseFromParent(); + MadeIRChange = true; + + return nullptr; + } + break; + } } return visitCallBase(*II); } Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -434,10 +434,20 @@ NewPtr->getType()->getPointerAddressSpace() == AS)) NewPtr = Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)); + Value *NewProvenancePtr = nullptr; + if (LI.hasNoaliasProvenanceOperand()) { + NewProvenancePtr = Builder.CreateBitCast(LI.getNoaliasProvenanceOperand(), + NewTy->getPointerTo(AS)); + } + LoadInst *NewLoad = Builder.CreateAlignedLoad( NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); copyMetadataForLoad(*NewLoad, LI); + if (LI.hasNoaliasProvenanceOperand()) { + assert(NewProvenancePtr); + NewLoad->setNoaliasProvenanceOperand(NewProvenancePtr); + } return NewLoad; } @@ -453,6 +463,12 @@ SmallVector, 8> MD; SI.getAllMetadata(MD); + Value *NewProvenancePtr = nullptr; + if (SI.hasNoaliasProvenanceOperand()) { + NewProvenancePtr = IC.Builder.CreateBitCast( + SI.getNoaliasProvenanceOperand(), V->getType()->getPointerTo(AS)); + } + StoreInst *NewStore = IC.Builder.CreateAlignedStore( V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlign(), SI.isVolatile()); @@ -493,6 +509,11 @@ } } + if (SI.hasNoaliasProvenanceOperand()) { + assert(NewProvenancePtr); + NewStore->setNoaliasProvenanceOperand(NewProvenancePtr); + } + return NewStore; } @@ -575,6 +596,10 @@ // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast(*UI++); + // skip the ptr_provenance + if ((UI != UE) && (*UI == SI)) + ++UI; + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); @@ -657,6 +682,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasProvenance(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); } @@ -706,6 +732,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasProvenance(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -918,6 +945,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + if (LI.hasNoaliasProvenanceOperand()) { + if (LI.getNoaliasProvenanceOperand() == LI.getPointerOperand() || + isa(LI.getNoaliasProvenanceOperand())) { + // degenerated ptr_provenance + LI.removeNoaliasProvenanceOperand(); + return &LI; + } + } + // Try to canonicalize the loaded type. if (Instruction *Res = combineLoadToOperationType(*this, LI)) return Res; @@ -1176,6 +1212,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasProvenance(AAMD); } return true; @@ -1224,6 +1261,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasProvenance(AAMD); Offset += EltSize; } @@ -1317,6 +1355,15 @@ Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + if (SI.hasNoaliasProvenanceOperand()) { + if (SI.getNoaliasProvenanceOperand() == SI.getPointerOperand() || + isa(SI.getNoaliasProvenanceOperand())) { + // degenerated ptr_provenance + SI.removeNoaliasProvenanceOperand(); + return &SI; + } + } + // Try to canonicalize the stored type. if (combineStoreToValueType(*this, SI)) return eraseInstFromFunction(SI); @@ -1551,6 +1598,7 @@ if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); + NewSI->setAAMetadataNoAliasProvenance(AATags); } // Nuke the old stores. Index: llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -606,11 +606,8 @@ new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment); unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_range, LLVMContext::MD_invariant_load, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_nonnull, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, @@ -622,14 +619,19 @@ NewLI->setMetadata(ID, FirstLI->getMetadata(ID)); // Add all operands to the new PHI and combine TBAA metadata. + AAMDNodes AAInfo; + FirstLI->getAAMetadata(AAInfo); for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = cast(PN.getIncomingValue(i)); combineMetadata(NewLI, LI, KnownIDs, true); + LI->getAAMetadata(AAInfo, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } + NewLI->setAAMetadata(AAInfo); + NewLI->setAAMetadataNoAliasProvenance(AAInfo); if (InVal) { // The new PHI unions all of the same values together. This is really Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2464,6 +2464,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: + case Intrinsic::noalias_decl: Users.emplace_back(I); continue; } @@ -2906,9 +2907,21 @@ AAMDNodes Nodes; L->getAAMetadata(Nodes); NL->setAAMetadata(Nodes); + NL->setAAMetadataNoAliasProvenance(Nodes); // Returning the load directly will cause the main loop to insert it in - // the wrong spot, so use replaceInstUsesWith(). - return replaceInstUsesWith(EV, NL); + // the wrong spot, so use ReplaceInstUsesWith(). + { + for (auto mdtag : + {LLVMContext::MD_dbg, LLVMContext::MD_prof, LLVMContext::MD_fpmath, + LLVMContext::MD_tbaa_struct, LLVMContext::MD_invariant_load, + LLVMContext::MD_nontemporal, + LLVMContext::MD_mem_parallel_loop_access}) { + if (auto *MD = L->getMetadata(mdtag)) { + NL->setMetadata(mdtag, MD); + } + } + return replaceInstUsesWith(EV, NL); + } } // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1344,11 +1344,13 @@ Earlier->getPointerOperand(), false, Earlier->getAlign(), Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); - unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, + unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_nontemporal}; SI->copyMetadata(*DepWrite, MDToKeep); + AAMDNodes AAInfo; + Earlier->getAAMetadata(AAInfo); + SI->setAAMetadata(AAInfo); + SI->setAAMetadataNoAliasProvenance(AAInfo); ++NumModifiedStores; // Delete the old stores and now-dead instructions that feed them. Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1276,6 +1276,8 @@ LI->getAAMetadata(Tags); if (Tags) NewLoad->setAAMetadata(Tags); + // FIXME: not sure if ptr_provenance propagation should be allowed + // here: dependend provenances should also migrate first ! if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1425,6 +1425,8 @@ NewVal->setDebugLoc(LoadI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); + // FIXME: not sure if ptr_provenance propagation should be allowed + // here: dependend provenances should also migrate first ! AvailablePreds.emplace_back(UnavailablePred, NewVal); } Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1337,6 +1337,7 @@ LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_noalias, LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); @@ -3121,8 +3122,9 @@ StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; PStore->getAAMetadata(AAMD, /*Merge=*/false); - PStore->getAAMetadata(AAMD, /*Merge=*/true); + QStore->getAAMetadata(AAMD, /*Merge=*/true); SI->setAAMetadata(AAMD); + SI->setAAMetadataNoAliasProvenance(AAMD); unsigned PAlignment = PStore->getAlignment(); unsigned QAlignment = QStore->getAlignment(); unsigned TypeAlignment = Index: llvm/test/Transforms/FunctionAttrs/nonnull.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -770,5 +770,15 @@ br i1 %11, label %7, label %8 } +; Return a pointer trivially nonnull (argument attribute) through a llvm.noalias.arg.guard +; FNATTR: define nonnull i8* @test_noalias_arg_guard +define i8* @test_noalias_arg_guard(i8* nonnull %p) { + %ret = tail call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %p, i8* %p) + ret i8* %p +} + +; Function Attrs: nounwind readnone +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone + attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} Index: llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll =================================================================== --- llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll +++ llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll @@ -40,10 +40,10 @@ ; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0} ; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0} ; CHECK: ![[RANGE]] = !{i32 10, i32 25} -; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]], ![[SCOPE1:[0-9]+]]} +; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]} ; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"} -; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"} ; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"} +; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"} ; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]} ; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"} Index: llvm/test/Transforms/InstCombine/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/noalias.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:8:8-S32-n32-v128:32:32-P0-p0:32:32:32:32:8-p10:32:32:32:32:8-p20:32:32:32:32:8" + +@__const.f1.i = private unnamed_addr constant { i8*, [4 x i8] } { i8* null, [4 x i8] undef }, align 4 + +declare void @do_something(i8*) + +; Function Attrs: nounwind optsize +define dso_local i64 @test01() local_unnamed_addr #0 { +entry: + %i.sroa.6 = alloca [2 x i8], align 2 + %i.sroa.6.0..sroa_idx6 = getelementptr inbounds [2 x i8], [2 x i8]* %i.sroa.6, i32 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + %0 = call i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]* %i.sroa.6, i32 2, metadata !2) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %i.sroa.6.0..sroa_idx6, i8* align 2 getelementptr inbounds (i8, i8* bitcast ({ i8*, [4 x i8] }* @__const.f1.i to i8*), i32 2), i32 2, i1 false) + %i.sroa.6.cast = bitcast [2 x i8]* %i.sroa.6 to i8* + call void @do_something(i8* %i.sroa.6.cast) + call void @llvm.lifetime.end.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + ret i64 undef +} + +; CHECK-LABEL: @test01( +; CHECK: %i.sroa.6 = alloca i16, align 2 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0i16.i32(i16* nonnull %i.sroa.6, i32 2, metadata !2) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]*, i32, metadata) #1 + +attributes #0 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"f1: i"} +!4 = distinct !{!4, !"f1"}