Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -780,6 +780,13 @@ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + if (U->getOperandNo() == LI.getNoaliasSideChannelOperandIndex()) { + // Skip side channel + assert(LI.hasNoaliasSideChannelOperand() && + LI.getNoaliasSideChannelOperand() == *U); + return; + } + if (!IsOffsetKnown) return PI.setAborted(&LI); @@ -792,6 +799,13 @@ } void visitStoreInst(StoreInst &SI) { + if (U->getOperandNo() == SI.getNoaliasSideChannelOperandIndex()) { + // Skip side channel + assert(SI.hasNoaliasSideChannelOperand() && + SI.getNoaliasSideChannelOperand() == *U); + return; + } + Value *ValOp = SI.getValueOperand(); if (ValOp == *U) return PI.setEscapedAndAborted(&SI); @@ -937,6 +951,57 @@ insertUse(II, Offset, Size, true); return; } + // look through noalias intrinsics + if (II.getIntrinsicID() == Intrinsic::noalias_decl) { + insertUse(II, Offset, AllocSize, true); + // do not enqueue direct users (?) They should be handled through a + // dependency on the original alloca + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (U->getOperandNo() == Intrinsic::NoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()), + false); + return; + } + if (U->getOperandNo() == 0) { + assert(II.getOperand(0) == *U); + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::side_noalias) { + if (U->getOperandNo() == Intrinsic::SideNoAliasIdentifyPArg) { + insertUse( + II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::SideNoAliasIdentifyPArg)->getType()), + false); + return; + } + // hmmm - do not look through the first argument for a llvm.side.noalias + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_arg_guard) { + if (U->getOperandNo() == 0) { + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG( + llvm::dbgs() + << "AllocaSlices::SliceBuilder: handling llvm.noalias.copy.guard:" + << (U->getOperandNo() == 0) << ":" << II << "\n"); + // Identify the usage, so that it can be split + if (U->getOperandNo() == 0) + enqueueUsers(II); + return; + } Base::visitIntrinsicInst(II); } @@ -1261,7 +1326,15 @@ LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); LoadInst *SomeLoad = cast(PN.user_back()); - Type *LoadTy = SomeLoad->getType(); + if (SomeLoad->getPointerOperand() != &PN) { + // this must be the side channel -> ignore the speculation for now + LLVM_DEBUG(llvm::dbgs() + << " not speculating dependency on side channel: " + << *SomeLoad << "\n"); + return; + } + + Type *LoadTy = cast(PN.getType())->getElementType(); IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -2272,6 +2345,9 @@ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; Type *NewAllocaTy; + IntrinsicInst *OldNoAliasDecl = nullptr; + IntrinsicInst *NewNoAliasDecl = nullptr; + // This is a convenience and flag variable that will be null unless the new // alloca's integer operations should be widened to this integer type due to // passing isIntegerWideningViable above. If it is non-null, the desired @@ -2342,6 +2418,7 @@ ++NumVectorized; } assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)); + prepareNoAliasDecl(); } bool visit(AllocaSlices::const_iterator I) { @@ -3061,11 +3138,126 @@ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) Store->setAAMetadata(AATags); - LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "(3) to: " << *Store << "\n"); return !II.isVolatile(); } - bool visitIntrinsicInst(IntrinsicInst &II) { + void prepareNoAliasDecl() { + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + for (auto U : OldAI.users()) { + IntrinsicInst *II = dyn_cast(U); + if (II && II->getIntrinsicID() == Intrinsic::noalias_decl) { + if (OldNoAliasDecl) { + // We alreay found a llvm.noalias.decl - leave it up to the visiter to + // proapgate + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + break; + } + IRB.SetInsertPoint(II); + IRB.SetCurrentDebugLocation(II->getDebugLoc()); + IRB.SetNamePrefix(Twine(NewAI.getName()) + ".noalias.decl."); + + OldNoAliasDecl = II; + LLVM_DEBUG(dbgs() << "Found llvm.noalias.decl: " << *II << "\n"); + ConstantInt *OldId = cast( + II->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + NewNoAliasDecl = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II->getArgOperand(2))); + LLVM_DEBUG(dbgs() << "New llvm.noalias.decl: " << *NewNoAliasDecl + << "\n"); + // continue - it is possible we see multiple llvm.noalias.decl! + } + } + } + + bool visitNoAliasDeclIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_decl); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + Value *New; + if (OldNoAliasDecl) { + assert(OldNoAliasDecl == &II); + assert(NewNoAliasDecl != nullptr); + New = NewNoAliasDecl; + } else { + assert(NewNoAliasDecl == nullptr); + ConstantInt *OldId = + cast(II.getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + New = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II.getArgOperand(2))); + } + (void)New; + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); + + // Record this instruction for deletion. + Pass.DeadInsts.insert(&II); + + // nothing else to do - preparation was already done + return true; + } + + bool visitSideNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::side_noalias); + assert(II.getArgOperand(Intrinsic::SideNoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::SideNoAliasNoAliasDeclArg) == + OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::SideNoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::SideNoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, + II.getArgOperand(Intrinsic::SideNoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = + II.getArgOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias); + assert(II.getArgOperand(Intrinsic::NoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg) == OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::NoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::NoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::NoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = II.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasCopyGuardIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard); + return true; + } + + bool visitLifetimeIntrinsicInst(IntrinsicInst &II) { assert(II.isLifetimeStartOrEnd()); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getArgOperand(1) == OldPtr); @@ -3103,6 +3295,25 @@ return true; } + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return visitLifetimeIntrinsicInst(II); + case Intrinsic::noalias_decl: + return visitNoAliasDeclIntrinsicInst(II); + case Intrinsic::noalias: + return visitNoAliasIntrinsicInst(II); + case Intrinsic::side_noalias: + return visitSideNoAliasIntrinsicInst(II); + case Intrinsic::noalias_copy_guard: + return visitNoAliasCopyGuardIntrinsicInst(II); + default: + assert(false && "SROA: SliceRewriter: unhandled intrinsic"); + return false; + } + } + void fixLoadStoreAlign(Instruction &Root) { // This algorithm implements the same visitor loop as // hasUnsafePHIOrSelectUse, and fixes the alignment of each load @@ -3202,6 +3413,66 @@ }; namespace { +static llvm::Instruction *introduceNoAliasWhenCopyGuardIndicesAreCompatible( + llvm::LoadInst *Load, llvm::Instruction *CopyGuardII) { + assert(CopyGuardII); + GetElementPtrInst *GEP = + dyn_cast(Load->getPointerOperand()); + assert(GEP && "load of llvm.noalias.copy.guard without GEP ??"); + + bool indicesAreCompatible = false; + MDNode *CopyGuardIndices = cast( + cast( + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + if (IMD->getNumOperands() != GEP->getNumIndices()) { + indicesAreCompatible = false; + } else { + unsigned index = 0; + indicesAreCompatible = true; + for (Value *Index : GEP->indices()) { + const MDOperand &MDIndex = IMD->getOperand(index); + ++index; + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isMinusOne()) + continue; // accept any index at this place + ConstantInt *C_rhs = dyn_cast(Index); + if ((C_rhs == nullptr) || + (C_lhs->getSExtValue() != + C_rhs->getSExtValue())) { // compare int64 - the ConstantInt can + // have different types + indicesAreCompatible = false; + break; + } + } + } + if (indicesAreCompatible) { + break; + } + } + } + if (indicesAreCompatible) { + IRBuilderTy IRB(Load->getNextNode()); + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + Load, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), GEP, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + Load->getName() + ".noalias"); + // juggle around + Load->replaceAllUsesWith(NoAlias); + NoAlias->setOperand(0, Load); + LLVM_DEBUG(llvm::dbgs() + << " - compatible, introduced:" << *NoAlias << "\n"); + return NoAlias; + } + + return Load; +} /// Visitor to rewrite aggregate loads and stores as scalar. /// @@ -3211,6 +3482,7 @@ class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class InstVisitor; + typedef InstVisitor Base; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3345,38 +3617,76 @@ struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + Instruction *CopyGuardII = nullptr; + unsigned CGIIndex = 0; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, - AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL, + Instruction *CopyGuardII_) : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, - DL), AATags(AATags) {} + DL), + AATags(AATags), CopyGuardII(CopyGuardII_) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. + auto Ptr = this->Ptr; // Make sure _NOT_ to overwrite the Ptr member + if (CopyGuardII) { + assert(CopyGuardII == Ptr && "Ptr != CopyGuardII ???"); + Ptr = CopyGuardII->getOperand(0); // look through noalias.copy.guard + } Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load"); + Instruction *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load"); if (AATags) Load->setAAMetadata(AATags); + if (CopyGuardII && Ty->isPointerTy()) { + Load = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + cast(Load), CopyGuardII); + } Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } }; bool visitLoadInst(LoadInst &LI) { + if (U->getOperandNo() == LI.getNoaliasSideChannelOperandIndex()) { + // Skip side channel + assert(LI.hasNoaliasSideChannelOperand() && + LI.getNoaliasSideChannelOperand() == *U); + return false; + } assert(LI.getPointerOperand() == *U); - if (!LI.isSimple() || LI.getType()->isSingleValueType()) + Instruction *CopyGuardII = nullptr; + { + Value *BasePtr = LI.getPointerOperand()->stripInBoundsOffsets(); + if (IntrinsicInst *BaseIntr = dyn_cast(BasePtr)) { + if (BaseIntr->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + CopyGuardII = BaseIntr; + LLVM_DEBUG(llvm::dbgs() << " Replacing Load:" << LI + << "\n" + " Depends on:" + << *CopyGuardII << "\n"); + } + } + } + if (!LI.isSimple() || LI.getType()->isSingleValueType()) { + if (CopyGuardII) { + auto Load = + introduceNoAliasWhenCopyGuardIndicesAreCompatible(&LI, CopyGuardII); + return (Load != &LI); + } return false; + } // We have an aggregate being loaded, split it apart. LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, - getAdjustedAlignment(&LI, 0, DL), DL); + getAdjustedAlignment(&LI, 0, DL), DL, CopyGuardII); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); LI.replaceAllUsesWith(V); @@ -3434,6 +3744,30 @@ return false; } + // Look through noalias intrinsics + bool visitIntrinsicInst(IntrinsicInst &II) { + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (II.getOperand(0) == *U) { + enqueueUsers(II); + } + return false; + } + if (II.getIntrinsicID() == Intrinsic::side_noalias || + II.getIntrinsicID() == Intrinsic::noalias_decl) { + return false; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG(llvm::dbgs() + << "AggLoadStoreRewriter: handling llvm.noalias.copy.guard:" + << (II.getOperand(0) == *U) << ":" << II << "\n"); + if (II.getOperand(0) == *U) + enqueueUsers(II); + return false; + } + + return Base::visitIntrinsicInst(II); + } + bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { enqueueUsers(ASC); return false; Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -35,6 +34,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -61,40 +62,181 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +class PromotableChecker { +public: + bool check(bool c) { return c; } + void trackRemovable(const Instruction *I) {} + void trackOperandToZero(const Instruction *I, int operand) {} + void trackNoAliasDecl(const IntrinsicInst *II) {} +}; + +class PromotableTracker { +public: + bool check(bool c) { + assert(!c && "PromotableTracke::check failed"); + return false; + } + void trackRemovable(Instruction *I) { + // FIXME: Performance Warning: linear search - might become slow (?) + if (std::find(mRemovables.begin(), mRemovables.end(), I) == + mRemovables.end()) + mRemovables.push_back(I); + } + void trackOperandToZero(Instruction *I, int operand) { + mZeroOperands.emplace_back(I, operand); + } + void trackNoAliasDecl(IntrinsicInst *II) { mNoAliasDecls.push_back(II); } + +public: + SmallVector mRemovables; + SmallVector, 4> mZeroOperands; + SmallVector mNoAliasDecls; +}; + +// Return true if the only usage of this pointer is as identifyP argument for +// llvm.noalias or llvm.side.noalias (either direct or recursive) +// Look through bitcast, getelementptr, llvm.noalias, llvm.side.noalias + +template +bool onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(Value *V, PT &pt); + +template +bool isAndOnlyUsedByNoaliasOrSideNoaliasIdentifyPArg(IntrinsicInst *II, + unsigned OpNo, PT &pt) { + if (II->getIntrinsicID() == Intrinsic::side_noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::SideNoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else if (OpNo == Intrinsic::SideNoAliasIdentifyPSideChannelArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.side.noalias dependency"); + } + return true; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::NoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.side.noalias dependency"); + } + return true; + } + + return false; +} + +template +bool onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(Value *V, PT &pt) { + for (Use &U_ : V->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + if (IntrinsicInst *II = dyn_cast(U)) { + if (isAndOnlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, OpNo, pt)) + continue; + return false; + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(BCI, pt)) + return false; + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(GEPI, pt)) + return false; + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(ASCI, pt)) + return false; + pt.trackRemovable(ASCI); + } else { + return false; + } + } + return true; +} + +template bool trackAllocaPromotable(AllocaInst *AI, PT &pt) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { + for (Use &U_ : AI->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + + if (LoadInst *LI = dyn_cast(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (LI->isVolatile()) + if (pt.check(LI->isVolatile())) return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) + if (OpNo == LI->getNoaliasSideChannelOperandIndex()) { + // Load will be removed. Disconnect side_noalias dependency + pt.trackOperandToZero(LI, OpNo); + } + } else if (StoreInst *SI = dyn_cast(U)) { + if (pt.check(OpNo == 0)) return false; // Don't allow a store OF the AI, only INTO the AI. // Note that atomic stores can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (SI->isVolatile()) + if (pt.check(SI->isVolatile())) return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd()) + if (OpNo == SI->getNoaliasSideChannelOperandIndex()) { + // Store will be removed. Disconnect side_noalias dependency + pt.trackOperandToZero(SI, OpNo); + } + } else if (IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + pt.trackRemovable(II); + break; + case Intrinsic::noalias_decl: + pt.trackNoAliasDecl(II); + break; + case Intrinsic::noalias: + case Intrinsic::side_noalias: + if (!isAndOnlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, OpNo, pt)) + return false; + break; + default: return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + } + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(BCI, pt)) { + pt.trackRemovable(BCI); + continue; + } + if (pt.check(BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!onlyUsedByLifetimeMarkers(BCI)) + if (pt.check(!onlyUsedByLifetimeMarkers(BCI))) return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + + for (auto *U2 : BCI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(GEPI, pt)) { + pt.trackRemovable(GEPI); + continue; + } + if (pt.check(GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!GEPI->hasAllZeroIndices()) + if (pt.check(!GEPI->hasAllZeroIndices())) return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) + if (pt.check(!onlyUsedByLifetimeMarkers(GEPI))) return false; + + for (auto *U2 : GEPI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(GEPI); } else { return false; } @@ -103,6 +245,11 @@ return true; } +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + PromotableChecker pc; + return trackAllocaPromotable(const_cast(AI), pc); +} + namespace { struct AllocaInfo { @@ -312,26 +459,188 @@ AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { - // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. +static void removeIntrinsicUsers(AllocaInst *AI) { + // The AI is going to be deleted. Remove lifeftime intrinsic users. + // Also disconnect and remove noalias/side.noalias/noalias_decl intrinsic + // users. - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast(*UI); - ++UI; - if (isa(I) || isa(I)) - continue; + // Track the possible intrinsics. If we do not have a noalias.decl or we do + // not have an unknown function scope, no extra modificiations are needed. If + // both are there, we need to propagate the MetadataValue from the declaration + // to those intrinsics that are using the unknown scope. + PromotableTracker pt; + + if (!trackAllocaPromotable(AI, pt)) { + assert(false && "trackAllocaPromotable not consistent"); + } + + // Propagate NoaliasDecl + MDNode *NoAliasUnknownScopeMD = + AI->getParent()->getParent()->getMetadata("noalias"); + Instruction *NoAliasDecl = nullptr; + if (pt.mNoAliasDecls.size() == 1) + NoAliasDecl = pt.mNoAliasDecls[0]; + + if (NoAliasUnknownScopeMD) { + if (NoAliasDecl) { + LLVM_DEBUG(llvm::dbgs() + << "- Propagating " << *NoAliasDecl << " scope to:\n"); + auto NoAliasDeclScope = + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg); + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::side_noalias) { + // If we get here, we can assume the identifyP or its sidechannel + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::SideNoAliasIdentifyPArg || + OpNo == Intrinsic::SideNoAliasIdentifyPSideChannelArg)); + unsigned ScopeArg = + (ID == Intrinsic::noalias ? Intrinsic::NoAliasScopeArg + : Intrinsic::SideNoAliasScopeArg); + unsigned DeclArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::SideNoAliasNoAliasDeclArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate the declaration scope + // Note: splitting already took care of updating the ObjId + LLVM_DEBUG(llvm::dbgs() << "-- " << *I << "\n"); + II->setOperand(ScopeArg, NoAliasDeclScope); + + // also update the noalias declaration + II->setOperand(DeclArg, NoAliasDecl); + } + } + } + } + } else if (pt.mNoAliasDecls.empty()) { + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::side_noalias) { + // If we get here, we can assume the identifyP or its sidechannel + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::SideNoAliasIdentifyPArg || + OpNo == Intrinsic::SideNoAliasIdentifyPSideChannelArg)); + unsigned ScopeArg = + (ID == Intrinsic::noalias ? Intrinsic::NoAliasScopeArg + : Intrinsic::SideNoAliasScopeArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate a more or less unique id + LLVM_DEBUG(llvm::dbgs() + << "-- No llvm.noalias.decl, looking through: " << *I + << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + } + } + } + } + } + } + + if (NoAliasDecl) { + // Check if we need to split up llvm.noalias.decl with unique ObjId's + // This is needed to differentiate restrict pointers, once the alloca is + // removed. NOTE: we might as well have depended on 'constant propagation of + // null' and work with a 'constant pointer' + // for IdentifyP. Not sure what mechanism would be the best. + const DataLayout &DL = AI->getParent()->getModule()->getDataLayout(); + std::map ObjId2NoAliasDecl; + + auto BaseObjId = cast(NoAliasDecl->getOperand( + Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(); + ObjId2NoAliasDecl[BaseObjId] = NoAliasDecl; + + for (auto PairIO : pt.mZeroOperands) { + IntrinsicInst *II = dyn_cast(PairIO.first); + if (II && ((II->getIntrinsicID() == Intrinsic::noalias) || + (II->getIntrinsicID() == Intrinsic::side_noalias))) { + auto OpNo = PairIO.second; + unsigned IdentifyPArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPArg + : Intrinsic::SideNoAliasIdentifyPArg; + unsigned ObjIdArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPObjIdArg + : Intrinsic::SideNoAliasIdentifyPObjIdArg; + unsigned NoAliasDeclArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::SideNoAliasNoAliasDeclArg; + + if ((unsigned)OpNo != IdentifyPArg) + continue; - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast(*UUI); - ++UUI; - Inst->eraseFromParent(); + auto CurrentObjId = + cast(II->getOperand(ObjIdArg))->getZExtValue(); + + assert(CurrentObjId == BaseObjId && + "Initial object id difference detected."); + + APInt PPointerOffset(DL.getPointerSizeInBits(), 0ull); + assert(AI == II->getOperand(IdentifyPArg) + ->stripAndAccumulateInBoundsConstantOffsets( + DL, PPointerOffset) && + "hmm.. expected stripped P to map to alloca"); + if (!PPointerOffset.isNullValue()) { + CurrentObjId += PPointerOffset.getZExtValue(); + auto &NewNoAliasDecl = ObjId2NoAliasDecl[CurrentObjId]; + if (NewNoAliasDecl == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "Creating llvm.noalias.decl for IdentifyPObjId " + << CurrentObjId << "\n"); + IRBuilder NoAliasDeclBuilder(NoAliasDecl); + NewNoAliasDecl = NoAliasDeclBuilder.CreateNoAliasDeclaration( + ConstantPointerNull::get(cast(AI->getType())), + CurrentObjId, + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + LLVM_DEBUG(llvm::dbgs() << "- " << *NewNoAliasDecl << "\n"); + } + II->setOperand(NoAliasDeclArg, NewNoAliasDecl); + II->setOperand(ObjIdArg, + ConstantInt::get(II->getOperand(ObjIdArg)->getType(), + CurrentObjId)); + LLVM_DEBUG(llvm::dbgs() + << "Remapping noalias.decl dependency: " << *II << "\n"); + } } } + } + + // set args to zero + for (auto II : pt.mNoAliasDecls) { + LLVM_DEBUG(llvm::dbgs() << "Zeoring noalias.decl dep: " << *II << "\n"); + assert(II->getIntrinsicID() == Intrinsic::noalias_decl); + II->setOperand(Intrinsic::NoAliasDeclAllocaArg, + ConstantPointerNull::get(cast(AI->getType()))); + } + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + LLVM_DEBUG(llvm::dbgs() + << "Zeroing operand " << OpNo << " of " << *I << "\n"); + I->setOperand(OpNo, ConstantPointerNull::get( + cast(I->getOperand(OpNo)->getType()))); + } + + // remove + for (auto I : pt.mRemovables) { + LLVM_DEBUG(llvm::dbgs() << "Removing " << *I << "\n"); I->eraseFromParent(); } } @@ -357,6 +666,9 @@ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast(*UI++); + // load/store can have a side channel + if ((UI != E) && (*UI == UserInst)) + ++UI; if (UserInst == OnlyStore) continue; LoadInst *LI = cast(UserInst); @@ -467,6 +779,9 @@ // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast(*UI++); + // load/store can have a side channel + if ((UI != E) && (*UI == LI)) + ++UI; if (!LI) continue; @@ -544,7 +859,7 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. Index: llvm/test/Transforms/SROA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias.ll @@ -0,0 +1,302 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_ri(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca i32*, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* undef, i32** %rp, align 4, !noalias !6 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !noalias !6 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %rp, i32 0, metadata !6) + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* %2, i32** %rp, align 4, !tbaa !2, !noalias !6 + %3 = load i32*, i32** %rp, align 4, !tbaa !2, !noalias !6 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %3, i8* %1, i32** %rp, i32 0, metadata !6), !tbaa !2, !noalias !6 + store i32 42, i32* %4, align 4, !tbaa !9, !noalias !6 + %5 = bitcast i32** %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4 + ret void +} + +; CHECK-LABEL: @test_ri( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_ra(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca [3 x i32*], align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store [3 x i32*] undef, [3 x i32*]* %rp, align 4, !noalias !11 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !11 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i32([3 x i32*]* %rp, i32 0, metadata !11) + %arrayinit.begin = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store i32* %2, i32** %arrayinit.begin, align 4, !tbaa !2, !noalias !11 + %arrayinit.element = getelementptr inbounds i32*, i32** %arrayinit.begin, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, align 4, !tbaa !2, !noalias !11 + %arrayinit.element1 = getelementptr inbounds i32*, i32** %arrayinit.element, i32 1 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr2 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, align 4, !tbaa !2, !noalias !11 + %arrayidx = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %5 = load i32*, i32** %arrayidx, align 4, !tbaa !2, !noalias !11 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %5, i8* %1, i32** %arrayidx, i32 0, metadata !11), !tbaa !2, !noalias !11 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !11 + %arrayidx3 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %7 = load i32*, i32** %arrayidx3, align 4, !tbaa !2, !noalias !11 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %7, i8* %1, i32** %arrayidx3, i32 0, metadata !11), !tbaa !2, !noalias !11 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !11 + %arrayidx4 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %9 = load i32*, i32** %arrayidx4, align 4, !tbaa !2, !noalias !11 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %9, i8* %1, i32** %arrayidx4, i32 0, metadata !11), !tbaa !2, !noalias !11 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !11 + %11 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_ra( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !11) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 16, metadata !11) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr, i8* %1, i32** null, i32 8, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr2, i8* %2, i32** null, i32 16, metadata !11), !tbaa !5, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i32([3 x i32*]*, i32, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_rs(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %foo = alloca %struct.FOO, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store %struct.FOO undef, %struct.FOO* %foo, align 4, !noalias !14 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !14 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %foo, i32 0, metadata !14) + %mP0 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store i32* %2, i32** %mP0, align 4, !tbaa !17, !noalias !14 + %mP1 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %mP1, align 4, !tbaa !19, !noalias !14 + %mP2 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr1 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr1, i32** %mP2, align 4, !tbaa !20, !noalias !14 + %mP02 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %5 = load i32*, i32** %mP02, align 4, !tbaa !17, !noalias !14 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %5, i8* %1, i32** %mP02, i32 0, metadata !14), !tbaa !17, !noalias !14 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !14 + %mP13 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %7 = load i32*, i32** %mP13, align 4, !tbaa !19, !noalias !14 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %7, i8* %1, i32** %mP13, i32 0, metadata !14), !tbaa !19, !noalias !14 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !14 + %mP24 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %9 = load i32*, i32** %mP24, align 4, !tbaa !20, !noalias !14 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %9, i8* %1, i32** %mP24, i32 0, metadata !14), !tbaa !20, !noalias !14 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !14 + %11 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_rs( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !14) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 16, metadata !14) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32 0, metadata !14), !tbaa !17, !noalias !14 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr, i8* %1, i32** null, i32 8, metadata !14), !tbaa !19, !noalias !14 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr1, i8* %2, i32** null, i32 16, metadata !14), !tbaa !20, !noalias !14 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ri_inlined(i32* %_p) local_unnamed_addr #2 !noalias !21 { +entry: + %rp = alloca i32*, align 4 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4, !noalias !24 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** nonnull %rp, i32 0, metadata !27), !noalias !21 + store i32* %_p, i32** %rp, noalias_sidechannel i32** undef, align 4, !noalias !24 + %2 = load i32*, i32** %rp, noalias_sidechannel i32** undef, align 4, !noalias !28 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** %rp, i32** undef, i32 0, metadata !27) #4, !noalias !28 + store i32 42, i32* %2, noalias_sidechannel i32* %3, align 4, !noalias !28 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4, !noalias !21 + ret void +} + +; CHECK-LABEL: @test_ri_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !24) +; CHECK: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !24){{.*}}, !noalias !27 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ra_inlined(i32* %_p) local_unnamed_addr #2 !noalias !29 { +entry: + %rp = alloca [3 x i32*], align 4 + %.fca.0.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !32 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i32([3 x i32*]* nonnull %rp, i32 0, metadata !35), !noalias !29 + store i32* %_p, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !noalias !32 + %arrayinit.element = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, noalias_sidechannel i32** undef, align 4, !noalias !32 + %arrayinit.element1 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %add.ptr2 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, noalias_sidechannel i32** undef, align 4, !noalias !32 + %2 = load i32*, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !noalias !36 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** %.fca.0.gep, i32** undef, i32 0, metadata !35) #4, !noalias !36 + store i32 42, i32* %2, noalias_sidechannel i32* %3, align 4, !noalias !36 + %arrayidx1.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %4 = load i32*, i32** %arrayidx1.i, noalias_sidechannel i32** undef, align 4, !noalias !36 + %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %4, i8* %1, i32** nonnull %arrayidx1.i, i32** undef, i32 0, metadata !35) #4, !noalias !36 + store i32 43, i32* %4, noalias_sidechannel i32* %5, align 4, !noalias !36 + %arrayidx2.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 2 + %6 = load i32*, i32** %arrayidx2.i, noalias_sidechannel i32** undef, align 4, !noalias !36 + %7 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %6, i8* %1, i32** nonnull %arrayidx2.i, i32** undef, i32 0, metadata !35) #4, !noalias !36 + store i32 44, i32* %6, noalias_sidechannel i32* %7, align 4, !noalias !36 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !29 + ret void +} + +; CHECK-LABEL: @test_ra_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !31) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !31) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 16, metadata !31) +; CHECK: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !31){{.*}}, !noalias !34 +; CHECK: %4 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i32 8, metadata !31){{.*}}, !noalias !34 +; CHECK: %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %add.ptr2, i8* %2, i32** nonnull null, i32** undef, i32 16, metadata !31){{.*}}, !noalias !34 + + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_rs_inlined(i32* %_p) local_unnamed_addr #2 !noalias !37 { +entry: + %foo = alloca %struct.FOO, align 4 + %.fca.0.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %.fca.2.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !40 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* nonnull %foo, i32 0, metadata !43), !noalias !37 + store i32* %_p, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !noalias !40 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %.fca.1.gep, noalias_sidechannel i32** undef, align 4, !noalias !40 + %add.ptr1 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr1, i32** %.fca.2.gep, noalias_sidechannel i32** undef, align 4, !noalias !40 + %mP0.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %mP0.i, noalias_sidechannel i32** undef, align 4, !noalias !44 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** %mP0.i, i32** undef, i32 0, metadata !43) #4, !noalias !44 + store i32 42, i32* %2, noalias_sidechannel i32* %3, align 4, !noalias !44 + %mP1.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %4 = load i32*, i32** %mP1.i, noalias_sidechannel i32** undef, align 4, !noalias !44 + %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %4, i8* %1, i32** nonnull %mP1.i, i32** undef, i32 0, metadata !43) #4, !noalias !44 + store i32 43, i32* %4, noalias_sidechannel i32* %5, align 4, !noalias !44 + %mP2.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %6 = load i32*, i32** %mP2.i, noalias_sidechannel i32** undef, align 4, !noalias !44 + %7 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %6, i8* %1, i32** nonnull %mP2.i, i32** undef, i32 0, metadata !43) #4, !noalias !44 + store i32 44, i32* %6, noalias_sidechannel i32* %7, align 4, !noalias !44 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !37 + ret void +} + +; CHECK-LABEL: @test_rs_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !38) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !38) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 16, metadata !38) +; CHECK: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !38){{.*}}, !noalias !41 +; CHECK: %4 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i32 8, metadata !38){{.*}}, !noalias !41 +; CHECK: %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %add.ptr1, i8* %2, i32** nonnull null, i32** undef, i32 16, metadata !38){{.*}}, !noalias !41 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO*, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"any pointer"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_ri: rp"} +!8 = distinct !{!8, !"test_ri"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!4, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_ra: rp"} +!13 = distinct !{!13, !"test_ra"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rs: foo"} +!16 = distinct !{!16, !"test_rs"} +!17 = !{!18, !3, i64 0, i64 4} +!18 = !{!4, i64 12, !"FOO", !3, i64 0, i64 4, !3, i64 4, i64 4, !3, i64 8, i64 4} +!19 = !{!18, !3, i64 4, i64 4} +!20 = !{!18, !3, i64 8, i64 4} +!21 = !{!22} +!22 = distinct !{!22, !23, !"test_ri_inlined: unknown scope"} +!23 = distinct !{!23, !"test_ri_inlined"} +!24 = !{!25, !22} +!25 = distinct !{!25, !26, !"test_ri_inlined: rp"} +!26 = distinct !{!26, !"test_ri_inlined"} +!27 = !{!25} +!28 = !{!22, !25, !22} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test_ra_inlined: unknown scope"} +!31 = distinct !{!31, !"test_ra_inlined"} +!32 = !{!33, !30} +!33 = distinct !{!33, !34, !"test_ra_inlined: rp"} +!34 = distinct !{!34, !"test_ra_inlined"} +!35 = !{!33} +!36 = !{!30, !33, !30} +!37 = !{!38} +!38 = distinct !{!38, !39, !"test_rs_inlined: unknown scope"} +!39 = distinct !{!39, !"test_rs_inlined"} +!40 = !{!41, !38} +!41 = distinct !{!41, !42, !"test_rs_inlined: foo"} +!42 = distinct !{!42, !"test_rs_inlined"} +!43 = !{!41} +!44 = !{!38, !41, !38}