Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -371,6 +371,16 @@ const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); + /// Return true if, when CxtI executes, it is guaranteed that either + /// I had executed already or that I is guaranteed to be reached and + /// be executed. + /// + /// The useful property this guarantees is that if I exhibits undefined + /// behavior under some circumstances, then the whole program will exhibit + /// undefined behavior at CxtI. + bool isGuaranteedToBeExecuted(const Instruction *I, const Instruction *CxtI, + const DominatorTree *DT = nullptr); + /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the /// context instruction, CxtI. Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -397,18 +397,9 @@ return false; } -bool llvm::isValidAssumeForContext(const Instruction *Inv, - const Instruction *CxtI, - const DominatorTree *DT) { - - // There are two restrictions on the use of an assume: - // 1. The assume must dominate the context (or the control flow must - // reach the assume whenever it reaches the context). - // 2. The context must not be in the assume's set of ephemeral values - // (otherwise we will use the assume to prove that the condition - // feeding the assume is trivially true, thus causing the removal of - // the assume). - +bool llvm::isGuaranteedToBeExecuted(const Instruction *Inv, + const Instruction *CxtI, + const DominatorTree *DT) { if (DT) { if (DT->dominates(Inv, CxtI)) return true; @@ -422,9 +413,9 @@ if (Inv->getParent() != CxtI->getParent()) return false; - // If we have a dom tree, then we now know that the assume doens't dominate - // the other instruction. If we don't have a dom tree then we can check if - // the assume is first in the BB. + // If we have a dom tree, then we now know that Inv doens't dominate the + // other instruction. If we don't have a dom tree then we can check if + // Inv is first in the BB. if (!DT) { // Search forward from the assume until we reach the context (or the end // of the block); the common case is that the assume will come first. @@ -434,21 +425,29 @@ return true; } - // Don't let an assume affect itself - this would cause the problems - // `isEphemeralValueOf` is trying to prevent, and it would also make - // the loop below go out of bounds. - if (Inv == CxtI) - return false; - - // The context comes first, but they're both in the same block. Make sure + // The context comes first, but they're both in the same block. Make sure // there is nothing in between that might interrupt the control flow. - for (BasicBlock::const_iterator I = - std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); + for (BasicBlock::const_iterator I = BasicBlock::const_iterator(CxtI), IE(Inv); I != IE; ++I) if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; - return !isEphemeralValueOf(Inv, CxtI); + return true; +} + +bool llvm::isValidAssumeForContext(const Instruction *I, + const Instruction *CxtI, + const DominatorTree *DT) { + // There are two restrictions on the use of an assume: + // 1. The assume must dominate the context (or the control flow must + // reach the assume whenever it reaches the context). + // 2. The context must not be in the assume's set of ephemeral values + // (otherwise we will use the assume to prove that the condition + // feeding the assume is trivially true, thus causing the removal of + // the assume). + + return isGuaranteedToBeExecuted(I, CxtI, DT) && + !isEphemeralValueOf(I, CxtI); } static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2405,15 +2405,10 @@ // are different types, for example by mapping !nonnull metadata to // !range metadata by modeling the null pointer constant converted to the // integer type. - // FIXME: Add support for range metadata here. Currently the utilities - // for this don't propagate range metadata in trivial cases from one - // integer load to another, don't handle non-addrspace-0 null pointers - // correctly, and don't have any support for mapping ranges as the - // integer type becomes winder or narrower. if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) copyNonnullMetadata(LI, N, *NewLI); - - // Try to preserve nonnull metadata + if (MDNode *N = LI.getMetadata(LLVMContext::MD_range)) + copyRangeMetadata(DL, LI, N, *NewLI); V = NewLI; // If this is an integer load past the end of the slice (which means the @@ -3597,7 +3592,7 @@ PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -1883,18 +1883,24 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI) { auto *NewTy = NewLI.getType(); + auto *OldTy = OldLI.getType(); - // Give up unless it is converted to a pointer where there is a single very - // valuable mapping we can do reliably. - // FIXME: It would be nice to propagate this in more ways, but the type - // conversions make it hard. - if (!NewTy->isPointerTy()) + if (DL.getTypeStoreSizeInBits(NewTy) == DL.getTypeSizeInBits(OldTy) && + NewTy->isIntegerTy()) { + // An integer with the same number of bits - give it the range + // metadata!. + NewLI.setMetadata(LLVMContext::MD_range, N); return; + } - unsigned BitWidth = DL.getTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(OldLI.getContext(), None); - NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + if (NewTy->isPointerTy()) { + // Try to convert the !range metadata to !nonnull metadata on the + // new pointer. + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), None); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } } } Index: lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -217,6 +217,7 @@ /// The alloca instructions being promoted. std::vector Allocas; DominatorTree &DT; + const DataLayout &DL; DIBuilder DIB; /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; @@ -262,9 +263,9 @@ PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DL(DT.getRoot()->getParent()->getParent()->getDataLayout()), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), - nullptr, &DT, AC) {} + AC(AC), SQ(DL, nullptr, &DT, AC) {} void run(); @@ -305,6 +306,31 @@ AC->registerAssumption(CI); } +static void addAssumptionsFromMetadata(LoadInst *LI, + Value *ReplVal, + DominatorTree &DT, + const DataLayout &DL, + AssumptionCache *AC) +{ + if (LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) { + addAssumeNonNull(AC, LI); + } + + if (auto *N = LI->getMetadata(LLVMContext::MD_range)) { + // Range metadata is harder to use as an assumption, + // so don't try to add one, but *do* try to copy + // the metadata to a load in the same BB. + if (LoadInst *NewLI = dyn_cast(ReplVal)) { + DEBUG(dbgs() << "trying to move !range metadata from" << + *LI << " to" << *NewLI << "\n"); + if (isGuaranteedToBeExecuted(LI, NewLI, &DT)) { + copyRangeMetadata(DL, *LI, N, *NewLI); + } + } + } +} + static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. @@ -339,6 +365,7 @@ /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + const DataLayout &DL, AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); @@ -394,9 +421,7 @@ // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, ReplVal, DT, DL, AC); LI->replaceAllUsesWith(ReplVal); LI->eraseFromParent(); @@ -443,6 +468,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + const DataLayout &DL, AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not @@ -489,9 +515,7 @@ // Note, if the load was marked as nonnull we don't want to lose that // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, ReplVal, DT, DL, AC); LI->replaceAllUsesWith(ReplVal); } @@ -560,7 +584,7 @@ // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, DL, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -571,7 +595,7 @@ // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) { + promoteSingleBlockAlloca(AI, Info, LBI, DT, DL, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -930,9 +954,7 @@ // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(V, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, V, DT, DL, AC); // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); Index: test/Transforms/SROA/preserve-nonnull.ll =================================================================== --- test/Transforms/SROA/preserve-nonnull.ll +++ test/Transforms/SROA/preserve-nonnull.ll @@ -3,6 +3,8 @@ ; Make sure that SROA doesn't lose nonnull metadata ; on loads from allocas that get optimized out. +%pair = type { i64, [0 x i64], [1 x i64] } + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) ; Check that we do basic propagation of nonnull when rewriting. @@ -42,6 +44,23 @@ ret float* %ret } +; Make sure we propagate the !range attribute when we expand loads. +define i64 @propagate_range(%pair* dereferenceable(16)) { +; CHECK-LABEL: define i64 @propagate_range( +; CHECK-NEXT: start: +; CHECK-NEXT: %[[SROA_IDX:.*]] = getelementptr inbounds %pair +; CHECK-NEXT: %[[RESULT:.*]] = load i64, i64* %[[SROA_IDX]], align 8, !range !1 +; CHECK: ret i64 %[[RESULT]] +start: + %a = alloca %pair + %1 = bitcast %pair* %0 to i8* + %2 = bitcast %pair* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 16, i32 8, i1 false) + %3 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0 + %4 = load i64, i64* %3, !range !1 + ret i64 %4 +} + ; Make sure we properly handle the !nonnull attribute when we convert ; a pointer load to an integer load. ; FIXME: While this doesn't do anythnig actively harmful today, it really @@ -90,3 +109,4 @@ } !0 = !{} +!1 = !{i64 0, i64 2}