Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2405,15 +2405,10 @@ // are different types, for example by mapping !nonnull metadata to // !range metadata by modeling the null pointer constant converted to the // integer type. - // FIXME: Add support for range metadata here. Currently the utilities - // for this don't propagate range metadata in trivial cases from one - // integer load to another, don't handle non-addrspace-0 null pointers - // correctly, and don't have any support for mapping ranges as the - // integer type becomes winder or narrower. if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) copyNonnullMetadata(LI, N, *NewLI); - - // Try to preserve nonnull metadata + if (MDNode *N = LI.getMetadata(LLVMContext::MD_range)) + copyRangeMetadata(DL, LI, N, *NewLI); V = NewLI; // If this is an integer load past the end of the slice (which means the @@ -3597,7 +3592,7 @@ PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -1883,18 +1883,23 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI) { auto *NewTy = NewLI.getType(); + auto *OldTy = OldLI.getType(); - // Give up unless it is converted to a pointer where there is a single very - // valuable mapping we can do reliably. - // FIXME: It would be nice to propagate this in more ways, but the type - // conversions make it hard. - if (!NewTy->isPointerTy()) - return; + if (DL.getTypeStoreSizeInBits(NewTy) == DL.getTypeSizeInBits(OldTy) && + NewTy->isIntegerTy()) { + // An integer with the same number of bits - give it the range + // metadata!. + NewLI.setMetadata(LLVMContext::MD_range, N); + } - unsigned BitWidth = DL.getTypeSizeInBits(NewTy); - if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(OldLI.getContext(), None); - NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + if (NewTy->isPointerTy()) { + // Try to convert the !range metadata to !nonnull metadata on the + // new pointer. + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), None); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } } } Index: lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -217,6 +217,7 @@ /// The alloca instructions being promoted. std::vector Allocas; DominatorTree &DT; + const DataLayout &DL; DIBuilder DIB; /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; @@ -262,9 +263,9 @@ PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DL(DT.getRoot()->getParent()->getParent()->getDataLayout()), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), - nullptr, &DT, AC) {} + AC(AC), SQ(DL, nullptr, &DT, AC) {} void run(); @@ -305,6 +306,33 @@ AC->registerAssumption(CI); } +static void addAssumptionsFromMetadata(LoadInst *LI, + Value *ReplVal, + DominatorTree &DT, + const DataLayout &DL, + AssumptionCache *AC) +{ + if (!AC) return; // FIXME: cargo cult. + if (LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + { + addAssumeNonNull(AC, LI); + } + + if (auto *N = LI->getMetadata(LLVMContext::MD_range)) { + // Range metadata is harder to use as an assumption, + // so don't try to add one, but *do* try to copy + // the metadata to a load in the same BB. + if (LoadInst *NewLI = dyn_cast(ReplVal)) { + DEBUG(dbgs() << "trying to move !range metadata from" << + *LI << " to" << *NewLI << "\n"); + if (isValidAssumeForContext(LI, NewLI, &DT)) { + copyRangeMetadata(DL, *LI, N, *NewLI); + } + } + } +} + static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. @@ -339,6 +367,7 @@ /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + const DataLayout &DL, AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); @@ -394,9 +423,7 @@ // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, ReplVal, DT, DL, AC); LI->replaceAllUsesWith(ReplVal); LI->eraseFromParent(); @@ -443,6 +470,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, DominatorTree &DT, + const DataLayout &DL, AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not @@ -489,9 +517,7 @@ // Note, if the load was marked as nonnull we don't want to lose that // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, ReplVal, DT, DL, AC); LI->replaceAllUsesWith(ReplVal); } @@ -560,7 +586,7 @@ // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, DL, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -571,7 +597,7 @@ // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) { + promoteSingleBlockAlloca(AI, Info, LBI, DT, DL, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -930,9 +956,7 @@ // If the load was marked as nonnull we don't want to lose // that information when we erase this Load. So we preserve // it with an assume. - if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonNullAt(V, LI, &DT)) - addAssumeNonNull(AC, LI); + addAssumptionsFromMetadata(LI, V, DT, DL, AC); // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); Index: test/Transforms/SROA/preserve-nonnull.ll =================================================================== --- test/Transforms/SROA/preserve-nonnull.ll +++ test/Transforms/SROA/preserve-nonnull.ll @@ -3,6 +3,8 @@ ; Make sure that SROA doesn't lose nonnull metadata ; on loads from allocas that get optimized out. +%pair = type { i64, [0 x i64], [1 x i64] } + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) ; Check that we do basic propagation of nonnull when rewriting. @@ -42,6 +44,23 @@ ret float* %ret } +; Make sure we propagate the !range attribute when we expand loads. +define i64 @propagate_range(%pair* dereferenceable(16)) { +; CHECK-LABEL: define i64 @propagate_range( +; CHECK-NEXT: start: +; CHECK-NEXT: %[[SROA_IDX:.*]] = getelementptr inbounds %pair +; CHECK-NEXT: %[[RESULT:.*]] = load i64, i64* %[[SROA_IDX]], align 8, !range !1 +; CHECK: ret i64 %[[RESULT]] +start: + %a = alloca %pair + %1 = bitcast %pair* %0 to i8* + %2 = bitcast %pair* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 16, i32 8, i1 false) + %3 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0 + %4 = load i64, i64* %3, !range !1 + ret i64 %4 +} + ; Make sure we properly handle the !nonnull attribute when we convert ; a pointer load to an integer load. ; FIXME: While this doesn't do anythnig actively harmful today, it really @@ -90,3 +109,4 @@ } !0 = !{} +!1 = !{i64 0, i64 2}