Index: llvm/trunk/lib/Analysis/Loads.cpp =================================================================== --- llvm/trunk/lib/Analysis/Loads.cpp +++ llvm/trunk/lib/Analysis/Loads.cpp @@ -25,39 +25,23 @@ using namespace llvm; -static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, - Type *Ty, const DataLayout &DL, +static bool isDereferenceableFromAttribute(const Value *BV, APInt Size, + const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI) { - assert(Offset.isNonNegative() && "offset can't be negative"); - assert(Ty->isSized() && "must be sized"); - bool CheckForNonNull = false; - APInt DerefBytes(Offset.getBitWidth(), + APInt DerefBytes(Size.getBitWidth(), BV->getPointerDereferenceableBytes(CheckForNonNull)); if (DerefBytes.getBoolValue()) - if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) + if (DerefBytes.uge(Size)) if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI)) return true; return false; } -static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { - Type *VTy = V->getType(); - Type *Ty = VTy->getPointerElementType(); - if (!Ty->isSized()) - return false; - - APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); - return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); -} - static bool isAligned(const Value *Base, APInt Offset, unsigned Align, const DataLayout &DL) { APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL)); @@ -85,7 +69,7 @@ /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( - const Value *V, unsigned Align, const DataLayout &DL, + const Value *V, unsigned Align, APInt Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { // Note that it is not safe to speculate into a malloc'd region because @@ -93,65 +77,50 @@ bool CheckForNonNull; if (V->isPointerDereferenceable(CheckForNonNull)) { - if (CheckForNonNull && !isKnownNonNullAt(V, CtxI, DT, TLI)) - return false; - return isAligned(V, Align, DL); + Type *ETy = V->getType()->getPointerElementType(); + if (ETy->isSized() && Size.ule(DL.getTypeStoreSize(ETy))) { + if (CheckForNonNull && !isKnownNonNullAt(V, CtxI, DT, TLI)) + return false; + return isAligned(V, Align, DL); + } } - // It's not always safe to follow a bitcast, for example: - // bitcast i8* (alloca i8) to i32* - // would result in a 4-byte load from a 1-byte alloca. However, - // if we're casting from a pointer from a type of larger size - // to a type of smaller size (or the same size), and the alignment - // is at least as large as for the resulting pointer type, then - // we can look through the bitcast. - if (const BitCastOperator *BC = dyn_cast(V)) { - Type *STy = BC->getSrcTy()->getPointerElementType(), - *DTy = BC->getDestTy()->getPointerElementType(); - if (STy->isSized() && DTy->isSized() && - (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && - (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, - CtxI, DT, TLI, Visited); - } + // bitcast instructions are no-ops as far as dereferenceability is concerned. + if (const BitCastOperator *BC = dyn_cast(V)) + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size, + DL, CtxI, DT, TLI, Visited); - if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) + if (isDereferenceableFromAttribute(V, Size, DL, CtxI, DT, TLI)) return isAligned(V, Align, DL); // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast(V)) { - Type *Ty = GEP->getResultElementType(); const Value *Base = GEP->getPointerOperand(); - // Conservatively require that the base pointer be fully dereferenceable - // and aligned. - if (!Visited.insert(Base).second) - return false; - if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, - Visited)) - return false; - APInt Offset(DL.getPointerTypeSizeInBits(GEP->getType()), 0); - if (!GEP->accumulateConstantOffset(DL, Offset)) + if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || + !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) return false; - // Check if the load is within the bounds of the underlying object - // and offset is aligned. - uint64_t LoadSize = DL.getTypeStoreSize(Ty); - Type *BaseType = GEP->getSourceElementType(); - assert(isPowerOf2_32(Align) && "must be a power of 2!"); - return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && - !(Offset & APInt(Offset.getBitWidth(), Align-1)); + // If the base pointer is dereferenceable for Offset+Size bytes, then the + // GEP (== Base + Offset) is dereferenceable for Size bytes. If the base + // pointer is aligned to Align bytes, and the Offset is divisible by Align + // then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also + // aligned to Align bytes. + + return Visited.insert(Base).second && + isDereferenceableAndAlignedPointer(Base, Align, Offset + Size, DL, + CtxI, DT, TLI, Visited); } // For gc.relocate, look through relocations if (const GCRelocateInst *RelocateInst = dyn_cast(V)) return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); + RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, TLI, Visited); if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, - CtxI, DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size, + DL, CtxI, DT, TLI, Visited); // If we don't know, assume the worst. return false; @@ -173,19 +142,13 @@ if (Align == 0) Align = DL.getABITypeAlignment(Ty); - if (Ty->isSized()) { - APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); - const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - - if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && - isAligned(BV, Offset, Align, DL)) - return true; - } + if (!Ty->isSized()) + return false; SmallPtrSet Visited; - return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, - Visited); + return ::isDereferenceableAndAlignedPointer( + V, Align, APInt(DL.getTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL, + CtxI, DT, TLI, Visited); } bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, Index: llvm/trunk/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll =================================================================== --- llvm/trunk/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll +++ llvm/trunk/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll @@ -0,0 +1,82 @@ +; RUN: opt -S -licm < %s | FileCheck %s + +; Note: the !invariant.load is there just solely to let us call @use() +; to add a fake use, and still have the aliasing work out. The call +; to @use(0) is just to provide a may-unwind exit out of the loop, so +; that LICM cannot hoist out the load simply because it is guaranteed +; to execute. + +declare void @use(i32) + +define void @f_0(i8* align 4 dereferenceable(1024) %ptr) { +; CHECK-LABEL: @f_0( +; CHECK: entry: +; CHECK: %val = load i32, i32* %ptr.i32 +; CHECK: br label %loop +; CHECK: loop: +; CHECK: call void @use(i32 0) +; CHECK-NEXT: call void @use(i32 %val) + + +entry: + %ptr.gep = getelementptr i8, i8* %ptr, i32 32 + %ptr.i32 = bitcast i8* %ptr.gep to i32* + br label %loop + +loop: + call void @use(i32 0) + %val = load i32, i32* %ptr.i32, !invariant.load !{} + call void @use(i32 %val) + br label %loop +} + +define void @f_1(i8* align 4 dereferenceable_or_null(1024) %ptr) { +; CHECK-LABEL: @f_1( +entry: + %ptr.gep = getelementptr i8, i8* %ptr, i32 32 + %ptr.i32 = bitcast i8* %ptr.gep to i32* + %ptr_is_null = icmp eq i8* %ptr, null + br i1 %ptr_is_null, label %leave, label %loop + +; CHECK: loop.preheader: +; CHECK: %val = load i32, i32* %ptr.i32 +; CHECK: br label %loop +; CHECK: loop: +; CHECK: call void @use(i32 0) +; CHECK-NEXT: call void @use(i32 %val) + +loop: + call void @use(i32 0) + %val = load i32, i32* %ptr.i32, !invariant.load !{} + call void @use(i32 %val) + br label %loop + +leave: + ret void +} + +define void @f_2(i8* align 4 dereferenceable_or_null(1024) %ptr) { +; CHECK-LABEL: @f_2( +; CHECK-NOT: load +; CHECK: call void @use(i32 0) +; CHECK-NEXT: %val = load i32, i32* %ptr.i32, !invariant.load !0 +; CHECK-NEXT: call void @use(i32 %val) + +entry: + ;; Can't hoist, since the alignment does not work out -- (<4 byte + ;; aligned> + 30) is not necessarily 4 byte aligned. + + %ptr.gep = getelementptr i8, i8* %ptr, i32 30 + %ptr.i32 = bitcast i8* %ptr.gep to i32* + %ptr_is_null = icmp eq i8* %ptr, null + br i1 %ptr_is_null, label %leave, label %loop + +loop: + call void @use(i32 0) + %val = load i32, i32* %ptr.i32, !invariant.load !{} + call void @use(i32 %val) + br label %loop + +leave: + ret void +}