Skip to content

Commit b5d59e7

Browse files
committedNov 27, 2017
[SROA] Propagate !range metadata when moving loads.
This tries to propagate !range metadata to a pre-existing load when a load is optimized out. This is done instead of adding an assume because converting loads to and from assumes creates a lot of IR. Patch by Ariel Ben-Yehuda. Differential Revision: https://reviews.llvm.org/D37216 llvm-svn: 319096
1 parent 2072552 commit b5d59e7

File tree

4 files changed

+188
-32
lines changed

4 files changed

+188
-32
lines changed
 

‎llvm/lib/Transforms/Scalar/SROA.cpp

+3-8
Original file line numberDiff line numberDiff line change
@@ -2455,15 +2455,10 @@ class llvm::sroa::AllocaSliceRewriter
24552455
// are different types, for example by mapping !nonnull metadata to
24562456
// !range metadata by modeling the null pointer constant converted to the
24572457
// integer type.
2458-
// FIXME: Add support for range metadata here. Currently the utilities
2459-
// for this don't propagate range metadata in trivial cases from one
2460-
// integer load to another, don't handle non-addrspace-0 null pointers
2461-
// correctly, and don't have any support for mapping ranges as the
2462-
// integer type becomes winder or narrower.
24632458
if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
24642459
copyNonnullMetadata(LI, N, *NewLI);
2465-
2466-
// Try to preserve nonnull metadata
2460+
if (MDNode *N = LI.getMetadata(LLVMContext::MD_range))
2461+
copyRangeMetadata(DL, LI, N, *NewLI);
24672462
V = NewLI;
24682463

24692464
// If this is an integer load past the end of the slice (which means the
@@ -3654,7 +3649,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
36543649
PartPtrTy, BasePtr->getName() + "."),
36553650
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
36563651
LI->getName());
3657-
PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
3652+
PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
36583653

36593654
// Append this load onto the list of split loads so we can find it later
36603655
// to rewrite the stores.

‎llvm/lib/Transforms/Utils/Local.cpp

+15-9
Original file line numberDiff line numberDiff line change
@@ -1947,18 +1947,24 @@ void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
19471947
void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
19481948
MDNode *N, LoadInst &NewLI) {
19491949
auto *NewTy = NewLI.getType();
1950+
auto *OldTy = OldLI.getType();
19501951

1951-
// Give up unless it is converted to a pointer where there is a single very
1952-
// valuable mapping we can do reliably.
1953-
// FIXME: It would be nice to propagate this in more ways, but the type
1954-
// conversions make it hard.
1955-
if (!NewTy->isPointerTy())
1952+
if (DL.getTypeStoreSizeInBits(NewTy) == DL.getTypeSizeInBits(OldTy) &&
1953+
NewTy->isIntegerTy()) {
1954+
// An integer with the same number of bits - give it the range
1955+
// metadata!.
1956+
NewLI.setMetadata(LLVMContext::MD_range, N);
19561957
return;
1958+
}
19571959

1958-
unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
1959-
if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
1960-
MDNode *NN = MDNode::get(OldLI.getContext(), None);
1961-
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
1960+
if (NewTy->isPointerTy()) {
1961+
// Try to convert the !range metadata to !nonnull metadata on the
1962+
// new pointer.
1963+
unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
1964+
if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
1965+
MDNode *NN = MDNode::get(OldLI.getContext(), None);
1966+
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
1967+
}
19621968
}
19631969
}
19641970

‎llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

+150-15
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "llvm/ADT/ArrayRef.h"
1919
#include "llvm/ADT/DenseMap.h"
20+
#include "llvm/ADT/Optional.h"
2021
#include "llvm/ADT/STLExtras.h"
2122
#include "llvm/ADT/SmallPtrSet.h"
2223
#include "llvm/ADT/SmallVector.h"
@@ -48,7 +49,7 @@
4849
#include "llvm/Transforms/Utils/Local.h"
4950
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
5051
#include <algorithm>
51-
#include <cassert>
52+
5253
#include <iterator>
5354
#include <utility>
5455
#include <vector>
@@ -177,6 +178,16 @@ class RenamePassData {
177178
ValVector Values;
178179
};
179180

181+
/// \brief Semi-open interval of instructions that are guaranteed to
182+
/// all execute if the first one does.
183+
class GuaranteedExecutionRange {
184+
public:
185+
unsigned Start;
186+
unsigned End;
187+
188+
GuaranteedExecutionRange(unsigned S, unsigned E): Start(S), End(E) {}
189+
};
190+
180191
/// \brief This assigns and keeps a per-bb relative ordering of load/store
181192
/// instructions in the block that directly load or store an alloca.
182193
///
@@ -190,14 +201,109 @@ class LargeBlockInfo {
190201
/// the block.
191202
DenseMap<const Instruction *, unsigned> InstNumbers;
192203

204+
/// \brief For each basic block we track, keep track of the intervals
205+
/// of instruction numbers of instructions that transfer control
206+
/// to their successors, for propagating metadata.
207+
DenseMap<const BasicBlock *, Optional<SmallVector<GuaranteedExecutionRange, 4>>>
208+
GuaranteedExecutionIntervals;
209+
193210
public:
194211

195-
/// This code only looks at accesses to allocas.
212+
/// This code looks for stores to allocas, and for loads both for
213+
/// allocas and for transferring metadata.
196214
static bool isInterestingInstruction(const Instruction *I) {
197-
return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
215+
return isa<LoadInst>(I) ||
198216
(isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
199217
}
200218

219+
/// Compute the GuaranteedExecutionIntervals for a given BB.
220+
///
221+
/// This is valid and remains valid as long as each interesting
222+
/// instruction (see isInterestingInstruction) that
223+
/// A) existed when this LBI was cleared
224+
/// B) has not been deleted (deleting interesting instructions is fine)
225+
/// are run in the same program executions and in the same order
226+
/// as when this LBI was cleared.
227+
///
228+
/// Because `PromoteMemoryToRegister` does not move memory loads at
229+
/// all, this assumption is satisfied in this pass.
230+
SmallVector<GuaranteedExecutionRange, 4> computeGEI(const BasicBlock *BB) {
231+
SmallVector<GuaranteedExecutionRange, 4> GuaranteedExecutionIntervals;
232+
233+
unsigned InstNo = 0;
234+
bool InRange = false;
235+
unsigned FirstInstInRange = 0;
236+
for (const Instruction &BBI : *BB) {
237+
if (isGuaranteedToTransferExecutionToSuccessor(&BBI)) {
238+
if (!InRange && isInterestingInstruction(&BBI)) {
239+
InRange = true;
240+
FirstInstInRange = InstNo;
241+
}
242+
} else {
243+
if (InRange) {
244+
assert(FirstInstInRange < InstNo && "Can't push an empty range here.");
245+
GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo);
246+
}
247+
InRange = false;
248+
}
249+
250+
if (isInterestingInstruction(&BBI)) {
251+
auto It = InstNumbers.find(&BBI);
252+
assert(It != InstNumbers.end() &&
253+
InstNo <= It->second &&
254+
"missing number for interesting instruction");
255+
InstNo = It->second + 1;
256+
}
257+
}
258+
259+
if (InRange) {
260+
assert(FirstInstInRange < InstNo && "Can't push an empty range here.");
261+
GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo);
262+
}
263+
264+
return GuaranteedExecutionIntervals;
265+
}
266+
267+
/// Return true if, when CxtI executes, it is guaranteed that either
268+
/// I had executed already or that I is guaranteed to be later executed.
269+
///
270+
/// The useful property this guarantees is that if I exhibits undefined
271+
/// behavior under some circumstances, then the whole program will exhibit
272+
/// undefined behavior at CxtI.
273+
bool isGuaranteedToBeExecuted(const Instruction *CxtI, const Instruction *I) {
274+
const BasicBlock *BB = CxtI->getParent();
275+
276+
if (BB != I->getParent()) {
277+
// Instructions in different basic blocks, so control flow
278+
// can diverge between them (we could track this with
279+
// postdoms, but we don't bother).
280+
return false;
281+
}
282+
283+
unsigned Index1 = getInstructionIndex(CxtI);
284+
unsigned Index2 = getInstructionIndex(I);
285+
286+
auto& BBGEI = GuaranteedExecutionIntervals[BB];
287+
if (!BBGEI.hasValue()) {
288+
BBGEI.emplace(computeGEI(BB));
289+
}
290+
291+
// We want to check whether I and CxtI are in the same range. To do that,
292+
// we notice that CxtI can only be in the first range R where
293+
// CxtI.end < R.end. If we find that range using binary search,
294+
// we can check whether I and CxtI are both in it.
295+
GuaranteedExecutionRange Bound(Index1, Index1);
296+
auto R = std::upper_bound(
297+
BBGEI->begin(), BBGEI->end(), Bound,
298+
[](GuaranteedExecutionRange I_, GuaranteedExecutionRange R) {
299+
return I_.End < R.End;
300+
});
301+
302+
return R != BBGEI->end() &&
303+
R->Start <= Index1 && Index1 < R->End &&
304+
R->Start <= Index2 && Index2 < R->End;
305+
}
306+
201307
/// Get or calculate the index of the specified instruction.
202308
unsigned getInstructionIndex(const Instruction *I) {
203309
assert(isInterestingInstruction(I) &&
@@ -213,9 +319,11 @@ class LargeBlockInfo {
213319
// avoid gratuitus rescans.
214320
const BasicBlock *BB = I->getParent();
215321
unsigned InstNo = 0;
322+
GuaranteedExecutionIntervals.erase(BB);
216323
for (const Instruction &BBI : *BB)
217324
if (isInterestingInstruction(&BBI))
218325
InstNumbers[&BBI] = InstNo++;
326+
219327
It = InstNumbers.find(I);
220328

221329
assert(It != InstNumbers.end() && "Didn't insert instruction?");
@@ -224,7 +332,10 @@ class LargeBlockInfo {
224332

225333
void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
226334

227-
void clear() { InstNumbers.clear(); }
335+
void clear() {
336+
InstNumbers.clear();
337+
GuaranteedExecutionIntervals.clear();
338+
}
228339
};
229340

230341
struct PromoteMem2Reg {
@@ -303,6 +414,7 @@ struct PromoteMem2Reg {
303414
SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
304415
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
305416
RenamePassData::ValVector &IncVals,
417+
LargeBlockInfo &LBI,
306418
std::vector<RenamePassData> &Worklist);
307419
bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
308420
};
@@ -321,6 +433,32 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
321433
AC->registerAssumption(CI);
322434
}
323435

436+
static void addAssumptionsFromMetadata(LoadInst *LI,
437+
Value *ReplVal,
438+
DominatorTree &DT,
439+
const DataLayout &DL,
440+
LargeBlockInfo &LBI,
441+
AssumptionCache *AC)
442+
{
443+
if (LI->getMetadata(LLVMContext::MD_nonnull) &&
444+
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) {
445+
addAssumeNonNull(AC, LI);
446+
}
447+
448+
if (auto *N = LI->getMetadata(LLVMContext::MD_range)) {
449+
// Range metadata is harder to use as an assumption,
450+
// so don't try to add one, but *do* try to copy
451+
// the metadata to a load in the same BB.
452+
if (LoadInst *NewLI = dyn_cast<LoadInst>(ReplVal)) {
453+
DEBUG(dbgs() << "trying to move !range metadata from" <<
454+
*LI << " to" << *NewLI << "\n");
455+
if (LBI.isGuaranteedToBeExecuted(LI, NewLI)) {
456+
copyRangeMetadata(DL, *LI, N, *NewLI);
457+
}
458+
}
459+
}
460+
}
461+
324462
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
325463
// Knowing that this alloca is promotable, we know that it's safe to kill all
326464
// instructions except for load and store.
@@ -409,9 +547,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
409547
// If the load was marked as nonnull we don't want to lose
410548
// that information when we erase this Load. So we preserve
411549
// it with an assume.
412-
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
413-
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
414-
addAssumeNonNull(AC, LI);
550+
addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC);
415551

416552
LI->replaceAllUsesWith(ReplVal);
417553
LI->eraseFromParent();
@@ -505,9 +641,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
505641
// Note, if the load was marked as nonnull we don't want to lose that
506642
// information when we erase it. So we preserve it with an assume.
507643
Value *ReplVal = std::prev(I)->second->getOperand(0);
508-
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
509-
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
510-
addAssumeNonNull(AC, LI);
644+
addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC);
511645

512646
LI->replaceAllUsesWith(ReplVal);
513647
}
@@ -643,7 +777,6 @@ void PromoteMem2Reg::run() {
643777

644778
if (Allocas.empty())
645779
return; // All of the allocas must have been trivial!
646-
647780
LBI.clear();
648781

649782
// Set the incoming values for the basic block to be null values for all of
@@ -661,9 +794,10 @@ void PromoteMem2Reg::run() {
661794
RenamePassData RPD = std::move(RenamePassWorkList.back());
662795
RenamePassWorkList.pop_back();
663796
// RenamePass may add new worklist entries.
664-
RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
797+
RenamePass(RPD.BB, RPD.Pred, RPD.Values, LBI, RenamePassWorkList);
665798
} while (!RenamePassWorkList.empty());
666799

800+
LBI.clear();
667801
// The renamer uses the Visited set to avoid infinite loops. Clear it now.
668802
Visited.clear();
669803

@@ -875,6 +1009,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
8751009
/// predecessor block Pred.
8761010
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
8771011
RenamePassData::ValVector &IncomingVals,
1012+
LargeBlockInfo &LBI,
8781013
std::vector<RenamePassData> &Worklist) {
8791014
NextIteration:
8801015
// If we are inserting any phi nodes into this BB, they will already be in the
@@ -941,13 +1076,12 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
9411076
// If the load was marked as nonnull we don't want to lose
9421077
// that information when we erase this Load. So we preserve
9431078
// it with an assume.
944-
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
945-
!isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
946-
addAssumeNonNull(AC, LI);
1079+
addAssumptionsFromMetadata(LI, V, DT, SQ.DL, LBI, AC);
9471080

9481081
// Anything using the load now uses the current value.
9491082
LI->replaceAllUsesWith(V);
9501083
BB->getInstList().erase(LI);
1084+
LBI.deleteValue(LI);
9511085
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
9521086
// Delete this instruction and mark the name as the current holder of the
9531087
// value
@@ -965,6 +1099,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
9651099
for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
9661100
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
9671101
BB->getInstList().erase(SI);
1102+
LBI.deleteValue(SI);
9681103
}
9691104
}
9701105

‎llvm/test/Transforms/SROA/preserve-nonnull.ll

+20
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; Make sure that SROA doesn't lose nonnull metadata
44
; on loads from allocas that get optimized out.
55

6+
%pair = type { i64, [0 x i64], [1 x i64] }
7+
68
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
79

810
; Check that we do basic propagation of nonnull when rewriting.
@@ -42,6 +44,23 @@ entry:
4244
ret float* %ret
4345
}
4446

47+
; Make sure we propagate the !range attribute when we expand loads.
48+
define i64 @propagate_range(%pair* dereferenceable(16)) {
49+
; CHECK-LABEL: define i64 @propagate_range(
50+
; CHECK-NEXT: start:
51+
; CHECK-NEXT: %[[SROA_IDX:.*]] = getelementptr inbounds %pair
52+
; CHECK-NEXT: %[[RESULT:.*]] = load i64, i64* %[[SROA_IDX]], align 8, !range !1
53+
; CHECK: ret i64 %[[RESULT]]
54+
start:
55+
%a = alloca %pair
56+
%1 = bitcast %pair* %0 to i8*
57+
%2 = bitcast %pair* %a to i8*
58+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 16, i32 8, i1 false)
59+
%3 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0
60+
%4 = load i64, i64* %3, !range !1
61+
ret i64 %4
62+
}
63+
4564
; Make sure we properly handle the !nonnull attribute when we convert
4665
; a pointer load to an integer load.
4766
; FIXME: While this doesn't do anythnig actively harmful today, it really
@@ -90,3 +109,4 @@ entry:
90109
}
91110

92111
!0 = !{}
112+
!1 = !{i64 0, i64 2}

0 commit comments

Comments
 (0)
Please sign in to comment.