Index: include/llvm/Analysis/Loads.h =================================================================== --- include/llvm/Analysis/Loads.h +++ include/llvm/Analysis/Loads.h @@ -20,14 +20,19 @@ class AliasAnalysis; class DataLayout; +class DominatorTree; class MDNode; +class TargetLibraryInfo; /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align); +bool isSafeToLoadUnconditionally(Value *V, unsigned Align, + Instruction *ScanFrom, + const DataLayout &DL, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at /// the instruction before ScanFrom) checking to see if we have the value at Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -13,8 +13,10 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" @@ -62,57 +64,23 @@ /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align) { - const DataLayout &DL = ScanFrom->getModule()->getDataLayout(); - +bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, + Instruction *ScanFrom, + const DataLayout &DL, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { // Zero alignment means that the load has the ABI alignment for the target if (Align == 0) Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); assert(isPowerOf2_32(Align)); - int64_t ByteOffset = 0; - Value *Base = V; - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); + if (isDereferenceableAndAlignedPointer(V, Align, DL, DT ? ScanFrom : nullptr, + DT, TLI)) + return true; - if (ByteOffset < 0) // out of bounds + if (!ScanFrom) return false; - Type *BaseType = nullptr; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalVariable *GV = dyn_cast(Base)) { - // Global variables are not necessarily safe to load from if they are - // overridden. Their size may change or they may be weak and require a test - // to determine if they were in fact provided. - if (!GV->mayBeOverridden()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); - - // If we found a base allocated type from either an alloca or global variable, - // try to see if we are definitively within the allocated region. We need to - // know the size of the base type and the loaded type to do anything in this - // case. - if (BaseType && BaseType->isSized()) { - if (BaseAlign == 0) - BaseAlign = DL.getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - // Check if the load is within the bounds of the underlying object. - if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && - ((ByteOffset % Align) == 0)) - return true; - } - } - // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored // from/to. If so, the previous load or store would have already trapped, @@ -120,9 +88,9 @@ // the load entirely). BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); - // We can at least always strip pointer casts even though we can't use the - // base here. V = V->stripPointerCasts(); + PointerType *AddrTy = cast(V->getType()); + uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); while (BBI != E) { --BBI; Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -3143,8 +3144,10 @@ LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceableAndAlignedPointer( - LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); + return + isSafeToLoadUnconditionally(const_cast(LI->getPointerOperand()), + LI->getAlignment(), + const_cast(CtxI), DL, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -810,8 +810,8 @@ if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) { + if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, SI, DL) && + isSafeToLoadUnconditionally(SI->getOperand(2), Align, SI, DL)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -1295,8 +1295,7 @@ // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (isDereferenceablePointer(InVal, DL) || - isSafeToLoadUnconditionally(InVal, TI, MaxAlign)) + if (isSafeToLoadUnconditionally(InVal, MaxAlign, TI, DL)) continue; return false; @@ -1365,8 +1364,6 @@ Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); const DataLayout &DL = SI.getModule()->getDataLayout(); - bool TDerefable = isDereferenceablePointer(TValue, DL); - bool FDerefable = isDereferenceablePointer(FValue, DL); for (User *U : SI.users()) { LoadInst *LI = dyn_cast(U); @@ -1376,11 +1373,9 @@ // Both operands to the select need to be dereferencable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. - if (!TDerefable && - !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment())) + if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), LI, DL)) return false; - if (!FDerefable && - !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment())) + if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), LI, DL)) return false; } Index: lib/Transforms/Scalar/ScalarReplAggregates.cpp =================================================================== --- lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1140,8 +1140,6 @@ /// the select can be loaded unconditionally. static bool isSafeSelectToSpeculate(SelectInst *SI) { const DataLayout &DL = SI->getModule()->getDataLayout(); - bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL); - bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL); for (User *U : SI->users()) { LoadInst *LI = dyn_cast(U); @@ -1149,13 +1147,11 @@ // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. - if (!TDerefable && - !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, - LI->getAlignment())) + if (!isSafeToLoadUnconditionally(SI->getTrueValue(), + LI->getAlignment(), LI, DL)) return false; - if (!FDerefable && - !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, - LI->getAlignment())) + if (!isSafeToLoadUnconditionally(SI->getFalseValue(), + LI->getAlignment(), LI, DL)) return false; } @@ -1228,8 +1224,7 @@ // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. - if (isDereferenceablePointer(InVal, DL) || - isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign)) + if (isSafeToLoadUnconditionally(InVal, MaxAlign, Pred->getTerminator(), DL)) continue; return false; Index: lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- lib/Transforms/Scalar/TailRecursionElimination.cpp +++ lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -452,9 +452,10 @@ // does not write to memory and the load provably won't trap. // FIXME: Writes to memory only matter if they may alias the pointer // being loaded from. + const DataLayout &DL = L->getModule()->getDataLayout(); if (CI->mayWriteToMemory() || - !isSafeToLoadUnconditionally(L->getPointerOperand(), L, - L->getAlignment())) + !isSafeToLoadUnconditionally(L->getPointerOperand(), + L->getAlignment(), L, DL)) return false; } } Index: test/Transforms/InstCombine/select.ll =================================================================== --- test/Transforms/InstCombine/select.ll +++ test/Transforms/InstCombine/select.ll @@ -1296,6 +1296,20 @@ ret i32 %v } +define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) %x, i32* dereferenceable(4) %y, i32* %z) { +; Test that we can speculate the loads around the select even when we can't +; fold the load completely away. +; CHECK-LABEL: @test78_deref( +; CHECK: %[[V1:.*]] = load i32, i32* %x +; CHECK-NEXT: %[[V2:.*]] = load i32, i32* %y +; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]] +; CHECK-NEXT: ret i32 %[[S]] +entry: + %p = select i1 %flag, i32* %x, i32* %y + %v = load i32, i32* %p + ret i32 %v +} + define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) { ; The same as @test78 but we can't speculate the load because it can trap ; if under-aligned. Index: test/Transforms/TailCallElim/reorder_load.ll =================================================================== --- test/Transforms/TailCallElim/reorder_load.ll +++ test/Transforms/TailCallElim/reorder_load.ll @@ -122,3 +122,26 @@ %tmp10 = add i32 %second, %tmp8 ; [#uses=1] ret i32 %tmp10 } + +; This load can be moved above the call because the function won't write to it +; and the a_arg is dereferenceable. +define fastcc i32 @raise_load_5(i32* dereferenceable(4) %a_arg, i32 %a_len_arg, i32 %start_arg) readonly { +; CHECK-LABEL: @raise_load_5( +; CHECK-NOT: call +; CHECK: load i32, i32* +; CHECK-NOT: call +; CHECK: } +entry: + %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; [#uses=1] + br i1 %tmp2, label %if, label %else + +if: ; preds = %entry + ret i32 0 + +else: ; preds = %entry + %tmp7 = add i32 %start_arg, 1 ; [#uses=1] + %tmp8 = call fastcc i32 @raise_load_5(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; [#uses=1] + %tmp9 = load i32, i32* %a_arg ; [#uses=1] + %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=1] + ret i32 %tmp10 +}