Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -227,6 +227,16 @@ const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); + + /// isDereferenceablePointer - Return true if this is always a dereferenceable + /// pointer with alignment greater or equal than requested. If the context + /// instruction is specified perform context-sensitive analysis and return + /// true if the pointer is dereferenceable at the specified instruction. + bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// isSafeToSpeculativelyExecute - Return true if the instruction does not /// have any effects besides calculating the result and does not have Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -65,6 +65,11 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align) { const DataLayout &DL = ScanFrom->getModule()->getDataLayout(); + + // Require ABI alignment for loads without alignment specification + if (Align == 0) + Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); + int64_t ByteOffset = 0; Value *Base = V; Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); @@ -102,7 +107,7 @@ if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && - (Align == 0 || (ByteOffset % Align) == 0)) + ((ByteOffset % Align) == 0)) return true; } } @@ -128,20 +133,28 @@ return false; Value *AccessedPtr; - if (LoadInst *LI = dyn_cast(BBI)) + unsigned AccessedAlign; + if (LoadInst *LI = dyn_cast(BBI)) { AccessedPtr = LI->getPointerOperand(); - else if (StoreInst *SI = dyn_cast(BBI)) + AccessedAlign = LI->getAlignment(); + } else if (StoreInst *SI = dyn_cast(BBI)) { AccessedPtr = SI->getPointerOperand(); - else + AccessedAlign = SI->getAlignment(); + } else + continue; + + auto *AccessedTy = AccessedPtr->getType()->getPointerElementType(); + if (AccessedAlign == 0) + AccessedAlign = DL.getABITypeAlignment(AccessedTy); + if (AccessedAlign < Align) continue; // Handle trivial cases. if (AccessedPtr == V) return true; - auto *AccessedTy = cast(AccessedPtr->getType()); if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && - LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType())) + LoadSize <= DL.getTypeStoreSize(AccessedTy)) return true; } return false; Index: lib/Analysis/MemDerefPrinter.cpp =================================================================== --- lib/Analysis/MemDerefPrinter.cpp +++ lib/Analysis/MemDerefPrinter.cpp @@ -22,7 +22,8 @@ namespace { struct MemDerefPrinter : public FunctionPass { - SmallVector Vec; + SmallVector Deref; + SmallVector DerefAndAligned; static char ID; // Pass identifcation, replacement for typeid MemDerefPrinter() : FunctionPass(ID) { @@ -34,7 +35,8 @@ bool runOnFunction(Function &F) override; void print(raw_ostream &OS, const Module * = nullptr) const override; void releaseMemory() override { - Vec.clear(); + Deref.clear(); + DerefAndAligned.clear(); } }; } @@ -55,7 +57,9 @@ if (LoadInst *LI = dyn_cast(&I)) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, DL)) - Vec.push_back(PO); + Deref.push_back(PO); + if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL)) + DerefAndAligned.push_back(PO); } } return false; @@ -63,7 +67,12 @@ void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const { OS << "The following are dereferenceable:\n"; - for (auto &V: Vec) { + for (auto &V: Deref) { + V->print(OS); + OS << "\n\n"; + } + OS << "The following are dereferenceable and aligned:\n"; + for (auto &V: DerefAndAligned) { V->print(OS); OS << "\n\n"; } Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2921,20 +2921,45 @@ return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); } -/// Return true if Value is always a dereferenceable pointer. -/// +static bool isAligned(const Value *Base, APInt Offset, unsigned Align, + const DataLayout &DL) { + assert(isPowerOf2_32(Align) && "must be a power of 2!"); + + APInt BaseAlign(Offset.getBitWidth(), 0); + if (const AllocaInst *AI = dyn_cast(Base)) { + BaseAlign = AI->getAlignment(); + } else if (const GlobalVariable *GV = dyn_cast(Base)) { + BaseAlign = GV->getAlignment(); + } else if (const Argument *A = dyn_cast(Base)) { + BaseAlign = A->getParamAlignment(); + } + + if (!BaseAlign.getBoolValue()) { + Type *Ty = Base->getType()->getPointerElementType(); + BaseAlign = DL.getABITypeAlignment(Ty); + } + + APInt Alignment(Offset.getBitWidth(), Align); + return BaseAlign.uge(Alignment) && Offset.urem(Alignment) == 0; +} + +static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { + APInt Offset(DL.getTypeStoreSizeInBits(Base->getType()), 0); + return isAligned(Base, Offset, Align, DL); +} + /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. -static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - SmallPtrSetImpl &Visited) { +static bool isDereferenceableAndAlignedPointer( + const Value *V, unsigned Align, const DataLayout &DL, + const Instruction *CtxI, const DominatorTree *DT, + const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. // These are obviously ok. - if (isa(V)) return true; + if (isa(V)) + return isAligned(V, Align, DL); // It's not always safe to follow a bitcast, for example: // bitcast i8* (alloca i8) to i32* @@ -2949,21 +2974,22 @@ if (STy->isSized() && DTy->isSized() && (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); } // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); + if (!GV->hasExternalWeakLinkage()) + return isAligned(V, Align, DL); // byval arguments are okay. if (const Argument *A = dyn_cast(V)) if (A->hasByValAttr()) - return true; - + return isAligned(V, Align, DL); + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) - return true; + return isAligned(V, Align, DL); // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast(V)) { @@ -2974,61 +3000,75 @@ // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(Base).second) return false; - if (!isDereferenceablePointer(Base, DL, CtxI, - DT, TLI, Visited)) + if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, + Visited)) return false; - + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); if (!GEP->accumulateConstantOffset(DL, Offset)) return false; - + // Check if the load is within the bounds of the underlying object. uint64_t LoadSize = DL.getTypeStoreSize(Ty); - Type* BaseType = Base->getType()->getPointerElementType(); + Type *BaseType = Base->getType()->getPointerElementType(); if ((Offset + LoadSize).ugt(DL.getTypeAllocSize(BaseType))) return false; - - return true; + + return isAligned(Base, Offset, Align, DL); } // For gc.relocate, look through relocations if (const IntrinsicInst *I = dyn_cast(V)) if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { GCRelocateOperands RelocateInst(I); - return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer( + RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); } if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. Type *VTy = V->getType(); Type *Ty = VTy->getPointerElementType(); + + // Require ABI alignment for loads without alignment specification + if (Align == 0) + Align = DL.getABITypeAlignment(Ty); + if (Ty->isSized()) { APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - + if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, - CtxI, DT, TLI)) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && + isAligned(BV, Offset, Align, DL)) return true; } SmallPtrSet Visited; - return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited); + return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, + Visited); +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI); } bool llvm::isSafeToSpeculativelyExecute(const Value *V, @@ -3081,7 +3121,8 @@ LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { Index: test/Analysis/ValueTracking/memory-dereferenceable.ll =================================================================== --- test/Analysis/ValueTracking/memory-dereferenceable.ll +++ test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -10,7 +10,13 @@ @globalstr = global [6 x i8] c"hello\00" @globali32ptr = external global i32* -define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" { +@globalptr.align1 = external global i8, align 1 +@globalptr.align16 = external global i8, align 16 + +define void @test(i32 addrspace(1)* dereferenceable(8) %dparam, + i8 addrspace(1)* dereferenceable(32) align 1 %dparam.align1, + i8 addrspace(1)* dereferenceable(32) align 16 %dparam.align16) + gc "statepoint-example" { ; CHECK: The following are dereferenceable: ; CHECK: %globalptr ; CHECK: %alloca @@ -22,6 +28,38 @@ ; CHECK-NOT: %d2_load ; CHECK-NOT: %d_or_null_load ; CHECK: %d_or_null_non_null_load +; CHECK: @globalptr.align1 +; CHECK: @globalptr.align16 +; CHECK: %dparam.align1 +; CHECK: %dparam.align16 +; CHECK: %alloca.align1 +; CHECK: %alloca.align16 +; CHECK: %gep.align1.offset1 +; CHECK: %gep.align16.offset1 +; CHECK: %gep.align1.offset16 +; CHECK: %gep.align16.offset16 + +; CHECK: The following are dereferenceable and aligned: +; CHECK: %globalptr +; CHECK: %alloca +; CHECK: %dparam +; CHECK: %relocate +; CHECK-NOT: %nparam +; CHECK-NOT: %nd_load +; CHECK: %d4_load +; CHECK-NOT: %d2_load +; CHECK-NOT: %d_or_null_load +; CHECK: %d_or_null_non_null_load +; CHECK-NOT: @globalptr.align1 +; CHECK: @globalptr.align16 +; CHECK-NOT: %dparam.align1 +; CHECK: %dparam.align16 +; CHECK-NOT: %alloca.align1 +; CHECK: %alloca.align16 +; CHECK-NOT: %gep.align1.offset1 +; CHECK-NOT: %gep.align16.offset1 +; CHECK-NOT: %gep.align1.offset16 +; CHECK: %gep.align16.offset16 entry: %globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0 %load1 = load i8, i8* %globalptr @@ -54,6 +92,30 @@ %d_or_null_non_null_load = load i32*, i32** @globali32ptr, !nonnull !2, !dereferenceable_or_null !0 %load10 = load i32, i32* %d_or_null_non_null_load + ; Loads from aligned globals + %load11 = load i8, i8* @globalptr.align1, align 16 + %load12 = load i8, i8* @globalptr.align16, align 16 + + ; Loads from aligned arguments + %load13 = load i8, i8 addrspace(1)* %dparam.align1, align 16 + %load14 = load i8, i8 addrspace(1)* %dparam.align16, align 16 + + ; Loads from aligned allocas + %alloca.align1 = alloca i1, align 1 + %alloca.align16 = alloca i1, align 16 + %load15 = load i1, i1* %alloca.align1, align 16 + %load16 = load i1, i1* %alloca.align16, align 16 + + ; Loads from GEPs + %gep.align1.offset1 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align1, i32 1 + %gep.align16.offset1 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align16, i32 1 + %gep.align1.offset16 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align1, i32 16 + %gep.align16.offset16 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align16, i32 16 + %load17 = load i8, i8 addrspace(1)* %gep.align1.offset1, align 16 + %load18 = load i8, i8 addrspace(1)* %gep.align16.offset1, align 16 + %load19 = load i8, i8 addrspace(1)* %gep.align1.offset16, align 16 + %load20 = load i8, i8 addrspace(1)* %gep.align16.offset16, align 16 + ret void } Index: test/Transforms/InstCombine/select.ll =================================================================== --- test/Transforms/InstCombine/select.ll +++ test/Transforms/InstCombine/select.ll @@ -1296,6 +1296,23 @@ ret i32 %v } +define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) { +; The same as @test78 but we can't speculate the load because it can trap +; if under-aligned. +; CHECK-LABEL: @test78_neg( +; CHECK: %p = select i1 %flag, i32* %x, i32* %y +; CHECK-NEXT: %v = load i32, i32* %p, align 16 +; CHECK-NEXT: ret i32 %v +entry: + store i32 0, i32* %x + store i32 0, i32* %y + ; Block forwarding by storing to %z which could alias either %x or %y. + store i32 42, i32* %z + %p = select i1 %flag, i32* %x, i32* %y + %v = load i32, i32* %p, align 16 + ret i32 %v +} + define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) { ; Test that we can speculate the loads around the select even when we can't ; fold the load completely away.