Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2405,15 +2405,10 @@ // are different types, for example by mapping !nonnull metadata to // !range metadata by modeling the null pointer constant converted to the // integer type. - // FIXME: Add support for range metadata here. Currently the utilities - // for this don't propagate range metadata in trivial cases from one - // integer load to another, don't handle non-addrspace-0 null pointers - // correctly, and don't have any support for mapping ranges as the - // integer type becomes winder or narrower. if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) copyNonnullMetadata(LI, N, *NewLI); - - // Try to preserve nonnull metadata + if (MDNode *N = LI.getMetadata(LLVMContext::MD_range)) + copyRangeMetadata(DL, LI, N, *NewLI); V = NewLI; // If this is an integer load past the end of the slice (which means the @@ -3597,7 +3592,7 @@ PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -1883,18 +1883,24 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI) { auto *NewTy = NewLI.getType(); + auto *OldTy = OldLI.getType(); - // Give up unless it is converted to a pointer where there is a single very - // valuable mapping we can do reliably. - // FIXME: It would be nice to propagate this in more ways, but the type - // conversions make it hard. - if (!NewTy->isPointerTy()) + if (DL.getTypeStoreSizeInBits(NewTy) == DL.getTypeSizeInBits(OldTy) && + NewTy->isIntegerTy()) { + // An integer with the same number of bits - give it the range + // metadata!. + NewLI.setMetadata(LLVMContext::MD_range, N); return; + } - unsigned BitWidth = DL.getTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(OldLI.getContext(), None); - NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + if (NewTy->isPointerTy()) { + // Try to convert the !range metadata to !nonnull metadata on the + // new pointer. + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), None); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } } } Index: lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -40,6 +41,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include + using namespace llvm; #define DEBUG_TYPE "mem2reg" @@ -163,6 +165,16 @@ ValVector Values; }; +/// \brief Semi-open interval of instructions that are guaranteed to +/// all execute if the first one does. +class GuaranteedExecutionRange { +public: + unsigned start; + unsigned end; + + GuaranteedExecutionRange(unsigned s, unsigned e): start(s), end(e) {} +}; + /// \brief This assigns and keeps a per-bb relative ordering of load/store /// instructions in the block that directly load or store an alloca. /// @@ -176,14 +188,109 @@ /// the block. DenseMap InstNumbers; + /// \brief For each basic block we track, keep track of the intervals + /// of instruction numbers of instructions that transfer control + /// to their successors, for propagating metadata. + DenseMap>> + GuaranteedExecutionIntervals; + public: - /// This code only looks at accesses to allocas. + /// This code looks for stores to allocas, and for loads both for + /// allocas and for transferring metadata. static bool isInterestingInstruction(const Instruction *I) { - return (isa(I) && isa(I->getOperand(0))) || + return isa(I) || (isa(I) && isa(I->getOperand(1))); } + /// Compute the GuaranteedExecutionIntervals for a given BB. + /// + /// This is valid and remains valid as long as each interesting + /// instruction (see isInterestingInstruction) that + /// A) existed when this LBI was cleared + /// B) has not been deleted (deleting interesting instructions is fine) + /// are run in the same program executions and in the same order + /// as when this LBI was cleared. + /// + /// Because `PromoteMemoryToRegister` does not move memory loads at + /// all, this assumption is satisfied in this pass. + SmallVector computeGEI(const BasicBlock *BB) { + SmallVector GuaranteedExecutionIntervals; + + unsigned InstNo = 0; + bool InRange = false; + unsigned FirstInstInRange = 0; + for (const Instruction &BBI : *BB) { + if (isGuaranteedToTransferExecutionToSuccessor(&BBI)) { + if (!InRange && isInterestingInstruction(&BBI)) { + InRange = true; + FirstInstInRange = InstNo; + } + } else { + if (InRange) { + assert(FirstInstInRange < InstNo && "Can't push an empty range here."); + GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo); + } + InRange = false; + } + + if (isInterestingInstruction(&BBI)) { + auto It = InstNumbers.find(&BBI); + assert(It != InstNumbers.end() && + InstNo <= It->second && + "missing number for interesting instruction"); + InstNo = It->second + 1; + } + } + + if (InRange) { + assert(FirstInstInRange < InstNo && "Can't push an empty range here."); + GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo); + } + + return GuaranteedExecutionIntervals; + } + + /// Return true if, when CxtI executes, it is guaranteed that either + /// I had executed already or that I is guaranteed to be later executed. + /// + /// The useful property this guarantees is that if I exhibits undefined + /// behavior under some circumstances, then the whole program will exhibit + /// undefined behavior at CxtI. + bool isGuaranteedToBeExecuted(const Instruction *CxtI, const Instruction *I) { + const BasicBlock *BB = CxtI->getParent(); + + if (BB != I->getParent()) { + // instructions in different basic blocks, so control flow + // can diverge between them (we could track this with + // postdoms, but we don't bother). + return false; + } + + unsigned index1 = getInstructionIndex(CxtI); + unsigned index2 = getInstructionIndex(I); + + auto& BBGEI = GuaranteedExecutionIntervals[BB]; + if (!BBGEI.hasValue()) { + BBGEI.emplace(computeGEI(BB)); + } + + // We want to check whether I and CxtI are in the same range. To do that, + // we notice that CxtI can only be in the first range R where + // CxtI.end < R.end. If we that range using binary search, + // we can check whether I and CxtI are both in it. + GuaranteedExecutionRange Bound(index1, index1); + auto R = std::upper_bound( + BBGEI->begin(), BBGEI->end(), Bound, + [](GuaranteedExecutionRange I_, GuaranteedExecutionRange R) { + return I_.end < R.end; + }); + + return R != BBGEI->end() && + R->start <= index1 && index1 < R->end && + R->start <= index2 && index2 < R->end; + } + /// Get or calculate the index of the specified instruction. unsigned getInstructionIndex(const Instruction *I) { assert(isInterestingInstruction(I) && @@ -199,9 +306,11 @@ // avoid gratuitus rescans. const BasicBlock *BB = I->getParent(); unsigned InstNo = 0; + GuaranteedExecutionIntervals.erase(BB); for (const Instruction &BBI : *BB) if (isInterestingInstruction(&BBI)) InstNumbers[&BBI] = InstNo++; + It = InstNumbers.find(I); assert(It != InstNumbers.end() && "Didn't insert instruction?"); @@ -210,13 +319,17 @@ void deleteValue(const Instruction *I) { InstNumbers.erase(I); } - void clear() { InstNumbers.clear(); } + void clear() { + InstNumbers.clear(); + GuaranteedExecutionIntervals.clear(); + } }; struct PromoteMem2Reg { /// The alloca instructions being promoted. std::vector Allocas; DominatorTree &DT; + const DataLayout &DL; DIBuilder DIB; /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; @@ -262,9 +375,9 @@ PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DL(DT.getRoot()->getParent()->getParent()->getDataLayout()), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), - nullptr, &DT, AC) {} + AC(AC), SQ(DL, nullptr, &DT, AC) {} void run(); @@ -287,6 +400,7 @@ SmallPtrSetImpl &LiveInBlocks); void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, + LargeBlockInfo &LBI, std::vector &Worklist); bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; @@ -305,6 +419,31 @@ AC->registerAssumption(CI); } +static void addAssumptionsFromMetadata(LoadInst *LI, + Value *ReplVal, + DominatorTree &DT, + const DataLayout &DL, + LargeBlockInfo &LBI, + AssumptionCache *AC) { + if (LI->getMetadata(LLVMContext::MD_nonnull) && + !isKnownNonNullAt(ReplVal, LI, &DT)) { + addAssumeNonNull(AC, LI); + } + + if (auto *N = LI->getMetadata(LLVMContext::MD_range)) { + // Range metadata is harder to use as an assumption, + // so don't try to add one, but *do* try to copy + // the metadata to a load in the same BB. + if (LoadInst *NewLI = dyn_cast(ReplVal)) { + DEBUG(dbgs() << "trying to move !range metadata from" << + *LI << " to" << *NewLI << "\n"); + if (LBI.isGuaranteedToBeExecuted(LI, NewLI)) { + copyRangeMetadata(DL, *LI, N, *NewLI); + } + } + } +} + static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. @@ -339,6 +478,7 @@ /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + const DataLayout &DL, AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); @@ -394,9 +534,7 @@ // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC); LI->replaceAllUsesWith(ReplVal); LI->eraseFromParent(); @@ -443,6 +581,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + const DataLayout &DL, AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not @@ -489,9 +628,7 @@ // Note, if the load was marked as nonnull we don't want to lose that // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC); LI->replaceAllUsesWith(ReplVal); } @@ -560,7 +697,7 @@ // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, DL, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -571,7 +708,7 @@ // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) { + promoteSingleBlockAlloca(AI, Info, LBI, DT, DL, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -628,7 +765,6 @@ if (Allocas.empty()) return; // All of the allocas must have been trivial! - LBI.clear(); // Set the incoming values for the basic block to be null values for all of @@ -648,9 +784,10 @@ RenamePassData RPD = std::move(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. - RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); + RenamePass(RPD.BB, RPD.Pred, RPD.Values, LBI, RenamePassWorkList); } while (!RenamePassWorkList.empty()); + LBI.clear(); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); @@ -864,6 +1001,7 @@ /// predecessor block Pred. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, + LargeBlockInfo &LBI, std::vector &Worklist) { NextIteration: // If we are inserting any phi nodes into this BB, they will already be in the @@ -930,13 +1068,12 @@ // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(V, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, V, DT, DL, LBI, AC); // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); BB->getInstList().erase(LI); + LBI.deleteValue(LI); } else if (StoreInst *SI = dyn_cast(I)) { // Delete this instruction and mark the name as the current holder of the // value @@ -954,6 +1091,7 @@ if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) ConvertDebugDeclareToDebugValue(DDI, SI, DIB); BB->getInstList().erase(SI); + LBI.deleteValue(SI); } } Index: test/Transforms/SROA/preserve-nonnull.ll =================================================================== --- test/Transforms/SROA/preserve-nonnull.ll +++ test/Transforms/SROA/preserve-nonnull.ll @@ -3,6 +3,8 @@ ; Make sure that SROA doesn't lose nonnull metadata ; on loads from allocas that get optimized out. +%pair = type { i64, [0 x i64], [1 x i64] } + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) ; Check that we do basic propagation of nonnull when rewriting. @@ -42,6 +44,23 @@ ret float* %ret } +; Make sure we propagate the !range attribute when we expand loads. +define i64 @propagate_range(%pair* dereferenceable(16)) { +; CHECK-LABEL: define i64 @propagate_range( +; CHECK-NEXT: start: +; CHECK-NEXT: %[[SROA_IDX:.*]] = getelementptr inbounds %pair +; CHECK-NEXT: %[[RESULT:.*]] = load i64, i64* %[[SROA_IDX]], align 8, !range !1 +; CHECK: ret i64 %[[RESULT]] +start: + %a = alloca %pair + %1 = bitcast %pair* %0 to i8* + %2 = bitcast %pair* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 16, i32 8, i1 false) + %3 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0 + %4 = load i64, i64* %3, !range !1 + ret i64 %4 +} + ; Make sure we properly handle the !nonnull attribute when we convert ; a pointer load to an integer load. ; FIXME: While this doesn't do anythnig actively harmful today, it really @@ -90,3 +109,4 @@ } !0 = !{} +!1 = !{i64 0, i64 2}