diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -25,7 +25,8 @@ /// Return true if this is always a dereferenceable pointer. If the context /// instruction is specified perform context-sensitive analysis and return true /// if the pointer is dereferenceable at the specified instruction. -bool isDereferenceablePointer(const Value *V, const DataLayout &DL, +bool isDereferenceablePointer(const Value *V, Type *Ty, + const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -33,8 +34,8 @@ /// greater or equal than requested. If the context instruction is specified /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. -bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, - const DataLayout &DL, +bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, + unsigned Align, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -55,7 +56,20 @@ /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, unsigned Align, +bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, + const DataLayout &DL, + Instruction *ScanFrom = nullptr, + const DominatorTree *DT = nullptr); + +/// Return true if we know that executing a load from this value cannot trap. +/// +/// If DT and ScanFrom are specified this method performs context-sensitive +/// analysis and returns true if it is safe to load immediately before ScanFrom. +/// +/// If it is not obviously safe to load from the specified pointer, we do a +/// quick local scan of the basic block containing ScanFrom, to determine if +/// the address is already accessed. +bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -125,7 +125,8 @@ Visited); } -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, + unsigned Align, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { @@ -133,8 +134,6 @@ // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. - Type *VTy = V->getType(); - Type *Ty = VTy->getPointerElementType(); // Require ABI alignment for loads without alignment specification if (Align == 0) @@ -145,14 +144,16 @@ SmallPtrSet Visited; return ::isDereferenceableAndAlignedPointer( - V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, - CtxI, DT, Visited); + V, Align, + APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)), + DL, CtxI, DT, Visited); } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, +bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, + const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -197,7 +198,7 @@ /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, +bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { @@ -208,7 +209,7 @@ // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; - if (isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT)) + if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT)) return true; int64_t ByteOffset = 0; @@ -313,7 +314,15 @@ return false; } -/// DefMaxInstsToScan - the default number of maximum instructions +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, + const DataLayout &DL, + Instruction *ScanFrom, + const DominatorTree *DT) { + APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); + return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT); +} + + /// DefMaxInstsToScan - the default number of maximum instructions /// to scan in the block, used by FindAvailableLoadedValue(). /// FindAvailableLoadedValue() was introduced in r60148, to improve jump /// threading in part by eliminating partially redundant loads. diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp --- a/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -53,9 +53,10 @@ for (auto &I: instructions(F)) { if (LoadInst *LI = dyn_cast(&I)) { Value *PO = LI->getPointerOperand(); - if (isDereferenceablePointer(PO, DL)) + if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL)) + if (isDereferenceableAndAlignedPointer(PO, LI->getType(), + LI->getAlignment(), DL)) DerefAndAligned.insert(PO); } } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3949,7 +3949,8 @@ return false; const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), - LI->getAlignment(), DL, CtxI, DT); + LI->getType(), LI->getAlignment(), + DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast(Inst); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4004,7 +4004,8 @@ bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); + bool isDereferenceable = + isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -4629,7 +4630,8 @@ Flags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) Flags |= MachineMemOperand::MOInvariant; - if (isDereferenceablePointer(I.getPointerOperand(), DAG.getDataLayout())) + if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), + DAG.getDataLayout())) Flags |= MachineMemOperand::MODereferenceable; Flags |= TLI.getMMOFlags(I); diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -479,9 +479,9 @@ return NF; } -/// AllCallersPassInValidPointerForArgument - Return true if we can prove that -/// all callees pass in a valid pointer for the specified function argument. -static bool allCallersPassInValidPointerForArgument(Argument *Arg) { +/// Return true if we can prove that all callees pass in a valid pointer for the +/// specified function argument. +static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) { Function *Callee = Arg->getParent(); const DataLayout &DL = Callee->getParent()->getDataLayout(); @@ -493,7 +493,7 @@ CallSite CS(U); assert(CS && "Should only have direct calls!"); - if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) + if (!isDereferenceablePointer(CS.getArgument(ArgNo), Ty, DL)) return false; } return true; @@ -566,7 +566,7 @@ /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR, +static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, unsigned MaxElements) { using GEPIndicesSet = std::set; @@ -596,8 +596,29 @@ GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByVal || allCallersPassInValidPointerForArgument(Arg)) + Type *BaseTy = nullptr; + if (ByValTy) { SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + BaseTy = ByValTy; + } + + // Whenever a new underlying type for the operand is found, make sure it's + // consistent with the GEPs and loads we've already seen and, if necessary, + // use it to see if all incoming pointers are valid (which implies the 0-index + // is safe). + auto UpdateBaseTy = [&](Type *NewBaseTy) { + if (BaseTy && NewBaseTy != BaseTy) + return false; + else if (!BaseTy) { + BaseTy = NewBaseTy; + if (allCallersPassValidPointerForArgument(Arg, BaseTy)) { + assert(SafeToUnconditionallyLoad.empty()); + SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + } + } + + return true; + }; // First, iterate the entry block and mark loads of (geps of) arguments as // safe. @@ -621,6 +642,9 @@ // right away, can't promote this argument at all. return false; + if (!UpdateBaseTy(GEP->getSourceElementType())) + return false; + // Indices checked out, mark them as safe markIndicesSafe(Indices, SafeToUnconditionallyLoad); Indices.clear(); @@ -628,6 +652,11 @@ } else if (V == Arg) { // Direct loads are equivalent to a GEP with a single 0 index. markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + + if (BaseTy && LI->getType() != BaseTy) + return false; + + BaseTy = LI->getType(); } } @@ -645,6 +674,9 @@ Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); + + if (!UpdateBaseTy(LI->getType())) + return false; } else if (GetElementPtrInst *GEP = dyn_cast(UR)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into @@ -653,9 +685,12 @@ // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements); + return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements); } + if (!UpdateBaseTy(GEP->getSourceElementType())) + return false; + // Ensure that all of the indices are constants. for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e; ++i) @@ -966,8 +1001,9 @@ } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR, - MaxElements)) + Type *ByValTy = + PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; + if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) ArgsToPromote.insert(PtrArg); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1060,7 +1060,7 @@ // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query - if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment, + if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment, II.getModule()->getDataLayout(), &II, nullptr)) { Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1064,8 +1064,10 @@ if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) { + if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align, + DL, SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align, + DL, SI)) { LoadInst *V1 = Builder.CreateLoad(LI.getType(), SI->getOperand(1), SI->getOperand(1)->getName() + ".val"); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2016,8 +2016,8 @@ // deref info through it. if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( - Store->getPointerOperand(), Store->getAlignment(), MDL, - Preheader->getTerminator(), DT); + Store->getPointerOperand(), Store->getValueOperand()->getType(), + Store->getAlignment(), MDL, Preheader->getTerminator(), DT); } } else return false; // Not a load or store. diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -162,7 +162,7 @@ return {}; } const auto &DL = GEP->getModule()->getDataLayout(); - if (!isDereferenceablePointer(GEP, DL)) { + if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) { LLVM_DEBUG(dbgs() << "not dereferenceable\n"); // We need to make sure that we can do comparison in any order, so we // require memory to be unconditionnally dereferencable. diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1190,12 +1190,16 @@ /// FIXME: This should be hoisted into a generic utility, likely in /// Transforms/Util/Local.h static bool isSafePHIToSpeculate(PHINode &PN) { + const DataLayout &DL = PN.getModule()->getDataLayout(); + // For now, we can only do this promotion if the load is in the same block // as the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. // TODO: Allow stores. BasicBlock *BB = PN.getParent(); unsigned MaxAlign = 0; + uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); + APInt MaxSize(APWidth, 0); bool HaveLoad = false; for (User *U : PN.users()) { LoadInst *LI = dyn_cast(U); @@ -1214,15 +1218,15 @@ if (BBI->mayWriteToMemory()) return false; + uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType()); MaxAlign = std::max(MaxAlign, LI->getAlignment()); + MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; HaveLoad = true; } if (!HaveLoad) return false; - const DataLayout &DL = PN.getModule()->getDataLayout(); - // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. @@ -1244,7 +1248,7 @@ // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI)) + if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI)) continue; return false; @@ -1334,9 +1338,11 @@ // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. - if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI)) + if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(), + DL, LI)) return false; - if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI)) + if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(), + DL, LI)) return false; } diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -340,7 +340,7 @@ // being loaded from. const DataLayout &DL = L->getModule()->getDataLayout(); if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || - !isSafeToLoadUnconditionally(L->getPointerOperand(), + !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(), L->getAlignment(), DL, L)) return false; }