Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -167,24 +167,26 @@ for (User *U : I->users()) { Instruction *UI = cast(U); Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) + IndicesVector Indices; + LoadInst *L = dyn_cast(UI); + if (L) SrcTy = L->getType(); else SrcTy = cast(UI)->getSourceElementType(); - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); - II != IE; ++II) - Indices.push_back(cast(*II)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); + if (L == nullptr) { // not a direct load (loads can have 2 operands) + Indices.reserve(UI->getNumOperands() - 1); + // This will record direct loads without any indices, and gep+loads + // with the GEP indices. + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); + II != IE; ++II) + Indices.push_back(cast(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + } ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) + if (L) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis @@ -309,6 +311,7 @@ AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); + newLoad->setAAMetadataNoAliasSideChannel(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1013,6 +1013,12 @@ case Instruction::Call: case Instruction::Invoke: { CallSite CS(RVI); + if (CS.getIntrinsicID() == Intrinsic::noalias_arg_guard || + CS.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // Look through a noalias arg/copy guard + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + } Function *Callee = CS.getCalledFunction(); // A call to a node within the SCC is assumed to return null until // proven otherwise Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -211,6 +211,9 @@ Value *V = SI->getValueOperand(); if (isa(V)) { Changed = true; + // skip potential second use from noalias_sidechannel before the erase + if ((UI != E) && (*UI == SI)) + UI++; SI->eraseFromParent(); } else if (Instruction *I = dyn_cast(V)) { if (I->hasOneUse()) @@ -763,6 +766,9 @@ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); // If we were able to delete all uses of the loads if (LI->use_empty()) { + // skip potential second use from noalias_sidechannel before the erase + if ((GUI != E) && (*GUI == LI)) + GUI++; LI->eraseFromParent(); Changed = true; } else { @@ -1250,6 +1256,9 @@ // users. for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from noalias_sidechannel + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } } @@ -1262,6 +1271,8 @@ std::vector > &PHIsToRewrite) { for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { Instruction *User = cast(*UI++); + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } @@ -1399,6 +1410,9 @@ // of the per-field globals instead. for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from noalias_sidechannel + if ((UI != E) && (*UI == User)) + ++UI; if (LoadInst *LI = dyn_cast(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4046,6 +4046,22 @@ } break; } + case Intrinsic::noalias_decl: { + Value *Op0 = II->getOperand(0); + Value *BaseOp0 = Op0->stripPointerCasts(); + if (Op0 != BaseOp0) { + auto *NewNoAlias = Builder.CreateNoAliasDeclaration( + BaseOp0, II->getOperand(1), II->getOperand(2)); + // bwaahh seems we need to do everything ourselves ? + II->replaceAllUsesWith(NewNoAlias); + Worklist.Remove(II); + II->eraseFromParent(); + MadeIRChange = true; + + return nullptr; + } + break; + } } return visitCallBase(*II); } Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -462,10 +462,20 @@ NewPtr->getType()->getPointerAddressSpace() == AS)) NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)); + Value *NewSidePtr = nullptr; + if (LI.hasNoaliasSideChannelOperand()) { + NewSidePtr = IC.Builder.CreateBitCast(LI.getNoaliasSideChannelOperand(), + NewTy->getPointerTo(AS)); + } + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); copyMetadataForLoad(*NewLoad, LI); + if (LI.hasNoaliasSideChannelOperand()) { + assert(NewSidePtr); + NewLoad->setNoaliasSideChannelOperand(NewSidePtr); + } return NewLoad; } @@ -481,6 +491,12 @@ SmallVector, 8> MD; SI.getAllMetadata(MD); + Value *NewSidePtr = nullptr; + if (SI.hasNoaliasSideChannelOperand()) { + NewSidePtr = IC.Builder.CreateBitCast(SI.getNoaliasSideChannelOperand(), + V->getType()->getPointerTo(AS)); + } + StoreInst *NewStore = IC.Builder.CreateAlignedStore( V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment(), SI.isVolatile()); @@ -521,6 +537,11 @@ } } + if (SI.hasNoaliasSideChannelOperand()) { + assert(NewSidePtr); + NewStore->setNoaliasSideChannelOperand(NewSidePtr); + } + return NewStore; } @@ -602,6 +623,10 @@ // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast(*UI++); + // skip the noalias_sidechannel + if ((UI != UE) && (*UI == SI)) + ++UI; + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); @@ -687,6 +712,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasSideChannel(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); } @@ -737,6 +763,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasSideChannel(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -949,6 +976,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + if (LI.hasNoaliasSideChannelOperand() && + (LI.getNoaliasSideChannelOperand() == LI.getPointerOperand())) { + // degenerated side-channel + LI.removeNoaliasSideChannelOperand(); + return &LI; + } + // Try to canonicalize the loaded type. if (Instruction *Res = combineLoadToOperationType(*this, LI)) return Res; @@ -1219,6 +1253,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasSideChannel(AAMD); } return true; @@ -1269,6 +1304,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasSideChannel(AAMD); Offset += EltSize; } @@ -1351,6 +1387,13 @@ Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + if (SI.hasNoaliasSideChannelOperand() && + (SI.getNoaliasSideChannelOperand() == SI.getPointerOperand())) { + // degenerated side-channel + SI.removeNoaliasSideChannelOperand(); + return &SI; + } + // Try to canonicalize the stored type. if (combineStoreToValueType(*this, SI)) return eraseInstFromFunction(SI); @@ -1591,6 +1634,7 @@ if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); + NewSI->setAAMetadataNoAliasSideChannel(AATags); } // Nuke the old stores. Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2752,9 +2752,27 @@ AAMDNodes Nodes; L->getAAMetadata(Nodes); NL->setAAMetadata(Nodes); + NL->setAAMetadataNoAliasSideChannel(Nodes); // Returning the load directly will cause the main loop to insert it in - // the wrong spot, so use replaceInstUsesWith(). - return replaceInstUsesWith(EV, NL); + // the wrong spot, so use ReplaceInstUsesWith(). + { + for (auto mdtag : { + LLVMContext::MD_dbg, + LLVMContext::MD_tbaa, + LLVMContext::MD_prof, + LLVMContext::MD_fpmath, + LLVMContext::MD_tbaa_struct, + LLVMContext::MD_invariant_load, + LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_nontemporal, + LLVMContext::MD_mem_parallel_loop_access }) { + if(auto *MD = L->getMetadata(mdtag)) { + NL->setMetadata(mdtag, MD); + } + } + return replaceInstUsesWith(EV, NL); + } } // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1252,6 +1252,8 @@ LI->getAAMetadata(Tags); if (Tags) NewLoad->setAAMetadata(Tags); + // FIXME: not sure if noalias_sidechannel propagation should be allowed + // here: dependend side channels should also migrate first ! if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1477,6 +1477,8 @@ NewVal->setDebugLoc(LoadI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); + // FIXME: not sure if noalias_sidechannel propagation should be allowed + // here: dependend side channels should also migrate first ! AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1315,6 +1315,7 @@ LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_noalias, LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); @@ -3072,8 +3073,9 @@ StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; PStore->getAAMetadata(AAMD, /*Merge=*/false); - PStore->getAAMetadata(AAMD, /*Merge=*/true); + QStore->getAAMetadata(AAMD, /*Merge=*/true); SI->setAAMetadata(AAMD); + SI->setAAMetadataNoAliasSideChannel(AAMD); unsigned PAlignment = PStore->getAlignment(); unsigned QAlignment = QStore->getAlignment(); unsigned TypeAlignment = Index: llvm/test/Transforms/FunctionAttrs/nonnull.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -470,4 +470,14 @@ ret void } +; Return a pointer trivially nonnull (argument attribute) through a llvm.noalias.arg.guard +define i8* @test_noalias_arg_guard(i8* nonnull %p) { +; BOTH: define nonnull i8* @test_noalias_arg_guard + %ret = tail call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %p, i8* %p) + ret i8* %p +} + +; Function Attrs: nounwind readnone +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone + attributes #0 = { "null-pointer-is-valid"="true" } Index: llvm/test/Transforms/InstCombine/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/noalias.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:8:8-S32-n32-v128:32:32-P0-p0:32:32:32:32:8-p10:32:32:32:32:8-p20:32:32:32:32:8" + +@__const.f1.i = private unnamed_addr constant { i8*, [4 x i8] } { i8* null, [4 x i8] undef }, align 4 + +; Function Attrs: nounwind optsize +define dso_local i64 @test01() local_unnamed_addr #0 { +entry: + %i.sroa.6 = alloca [2 x i8], align 2 + %i.sroa.6.0..sroa_idx6 = getelementptr inbounds [2 x i8], [2 x i8]* %i.sroa.6, i32 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + %0 = call i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]* %i.sroa.6, i32 2, metadata !2) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %i.sroa.6.0..sroa_idx6, i8* align 2 getelementptr inbounds (i8, i8* bitcast ({ i8*, [4 x i8] }* @__const.f1.i to i8*), i32 2), i32 2, i1 false) + call void @llvm.lifetime.end.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + ret i64 undef +} + +; CHECK-LABEL: @test01( +; CHECK: %i.sroa.6 = alloca i16, align 2 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0i16.i32(i16* nonnull %i.sroa.6, i32 2, metadata !2) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]*, i32, metadata) #1 + +attributes #0 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"f1: i"} +!4 = distinct !{!4, !"f1"}