Index: lib/CodeGen/StackColoring.cpp =================================================================== --- lib/CodeGen/StackColoring.cpp +++ lib/CodeGen/StackColoring.cpp @@ -64,18 +64,180 @@ /// The user may write code that uses allocas outside of the declared lifetime /// zone. This can happen when the user returns a reference to a local /// data-structure. We can detect these cases and decide not to optimize the -/// code. If this flag is enabled, we try to save the user. +/// code. If this flag is enabled, we try to save the user. This option +/// is treated as overriding LifetimeStartOnFirstUse below. static cl::opt ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken")); +/// Enable enhanced dataflow scheme for lifetime analysis (treat first +/// use of stack slot as start of slot lifetime, as opposed to looking +/// for LIFETIME_START marker). See "Implementation notes" below for +/// more info. +static cl::opt +LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", + cl::init(true), cl::Hidden, + cl::desc("Treat stack lifetimes as starting on first use, not on START marker.")); + + STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +// +// Implementation Notes: +// --------------------- +// +// Consider the following motivating example: +// +// int foo() { +// char b1[1024], b2[1024]; +// if (...) { +// char b3[1024]; +// ; +// return x; +// } else { +// char b4[1024], b5[1024]; +// ; +// return y; +// } +// } +// +// In the code above, "b3" and "b4" are declared in distinct lexical +// scopes, meaning that it is easy to prove that they can share the +// same stack slot. Variables "b1" and "b2" are declared in the same +// scope, meaning that from a lexical point of view, their lifetimes +// overlap. From a control flow pointer of view, however, the two +// variables are accessed in disjoint regions of the CFG, thus it +// should be possible for them to share the same stack slot. An ideal +// stack allocation for the function above would look like: +// +// slot 0: b1, b2 +// slot 1: b3, b4 +// slot 2: b5 +// +// Achieving this allocation is tricky, however, due to the way +// lifetime markers are inserted. Here is a simplified view of the +// control flow graph for the code above: +// +// +------ block 0 -------+ +// 0| LIFETIME_START b1, b2 | +// 1| | +// +-----------------------+ +// ./ \. +// +------ block 1 -------+ +------ block 2 -------+ +// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 | +// 3| | 6| | +// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 | +// +-----------------------+ +-----------------------+ +// \. /. +// +------ block 3 -------+ +// 8| | +// 9| LIFETIME_END b1, b2 | +// 10| return | +// +-----------------------+ +// +// If we create live intervals for the variables above strictly based +// on the lifetime markers, we'll get the set of intervals on the +// left. If we ignore the lifetime start markers and instead treat a +// variable's lifetime as beginning with the first reference to the +// var, then we get the intervals on the right. +// +// LIFETIME_START First Use +// b1: [0,9] [3,4] [8,9] +// b2: [0,9] [6,9] +// b3: [2,4] [3,4] +// b4: [5,7] [6,7] +// b5: [5,7] [6,7] +// +// For the intervals on the left, the best we can do is overlap two +// variables (b3 and b4, for example); this gives us a stack size of +// 4*1024 bytes, not ideal. When treating first-use as the start of a +// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 +// byte stack (better). +// +// Relying entirely on first-use of stack slots is problematic, +// however, due to the fact that optimizations can sometimes migrate +// uses of a variable outside of its lifetime start/end region. Here +// is an example: +// +// int bar() { +// char b1[1024], b2[1024]; +// if (...) { +// +// return y; +// } else { +// +// while (...) { +// char b3[1024]; +// +// } +// } +// } +// +// Before optimization, the control flow graph for the code above +// might look like the following: +// +// +------ block 0 -------+ +// 0| LIFETIME_START b1, b2 | +// 1| | +// +-----------------------+ +// ./ \. +// +------ block 1 -------+ +------- block 2 -------+ +// 2| | 3| | +// +-----------------------+ +-----------------------+ +// | | +// | +------- block 3 -------+ <-\. +// | 4| | | +// | +-----------------------+ | +// | / | | +// | / +------- block 4 -------+ +// \ / 5| LIFETIME_START b3 | | +// \ / 6| | | +// \ / 7| LIFETIME_END b3 | | +// \ | +------------------------+ | +// \ | \ / +// +------ block 5 -----+ \--------------- +// 8| | +// 9| LIFETIME_END b1, b2 | +// 10| return | +// +---------------------+ +// +// During optimization, however, it can happen that an instruction +// computing an address in "b3" (for example, a loop-invariant GEP) is +// hoisted up out of the loop from block 4 to block 2. [Note that +// this is not an actual load from the stack, only an instruction that +// computes the address to be loaded]. If this happens, there is now a +// path leading from the first use of b3 to the return instruction +// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is +// now larger than if we were computing live intervals strictly based +// on lifetime markers. In the example above, this lengthened lifetime +// would mean that it would appear illegal to overlap b3 with b2. +// +// To deal with this such cases, the code in ::collectMarkers() below +// tries to identify "degenerate" slots -- those slots where on a single +// forward pass through the CFG we encounter a first reference to slot +// K before we hit the slot K lifetime start marker. For such slots, +// we fall back on using the lifetime start marker as the beginning of +// the variable's lifetime. NB: with this implementation, slots can +// appear degenerate in cases where there is unstructured control flow: +// +// if (q) goto mid; +// if (x > 9) { +// int b[100]; +// memcpy(&b[0], ...); +// mid: b[k] = ...; +// abc(&b); +// } +// +// If in RPO ordering chosen to walk the CFG we happen to visit the b[k] +// before visiting the memcpy block (which will contain the lifetime start +// for "b" then it will appear that 'b' has a degenerate lifetime. +// + //===----------------------------------------------------------------------===// // StackColoring Pass //===----------------------------------------------------------------------===// @@ -123,6 +285,17 @@ /// once the coloring is done. SmallVector Markers; + /// Record the FI slots for which we have seen some sort of + /// lifetime marker (either start or end). + BitVector InterestingSlots; + + /// Degenerate slots -- first use appears outside of start/end + /// lifetime markers. + BitVector DegenerateSlots; + + /// Number of iterations taken during data flow analysis. + unsigned NumIterations; + public: static char ID; StackColoring() : MachineFunctionPass(ID) { @@ -153,6 +326,25 @@ /// in and out blocks. void calculateLocalLiveness(); + /// Returns TRUE if we're using the first-use-begins-lifetime method for + /// this slot (if FALSE, then the start marker is treated as start of lifetime). + bool applyFirstUse(int Slot) { + if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas) + return false; + if (DegenerateSlots.test(Slot)) + return false; + return true; + } + + /// Examines the specified instruction and returns TRUE if the instruction + /// represents the start or end of an interesting lifetime. The slot or slots + /// starting or ending are added to the vector "slots" and "isStart" is set + /// accordingly. + /// \returns True if inst contains a lifetime start or end + bool isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector &slots, + bool &isStart); + /// Construct the LiveIntervals for the slots. void calculateLiveIntervals(unsigned NumSlots); @@ -170,7 +362,10 @@ /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. - void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots); + void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots); + + /// Used in collectMarkers + typedef DenseMap BlockBitVecMap; }; } // end anonymous namespace @@ -228,9 +423,140 @@ #endif // not NDEBUG -unsigned StackColoring::collectMarkers(unsigned NumSlot) { +static inline int getStartOrEndSlot(const MachineInstr &MI) +{ + assert((MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) && + "Expected LIFETIME_START or LIFETIME_END op"); + const MachineOperand &MO = MI.getOperand(0); + int Slot = MO.getIndex(); + if (Slot >= 0) + return Slot; + return -1; +} + +// +// At the moment the only way to end a variable lifetime is with +// a VARIABLE_LIFETIME op (which can't contain a start). If things +// change and the IR allows for a single inst that both begins +// and ends lifetime(s), this interface will need to be reworked. +// +bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector &slots, + bool &isStart) +{ + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + return false; + if (!InterestingSlots.test(Slot)) + return false; + slots.push_back(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_END) { + isStart = false; + return true; + } + if (! applyFirstUse(Slot)) { + isStart = true; + return true; + } + } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { + if (! MI.isDebugValue()) { + bool found = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot<0) + continue; + if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) { + slots.push_back(Slot); + found = true; + } + } + if (found) { + isStart = true; + return true; + } + } + } + return false; +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) +{ unsigned MarkersFound = 0; - // Scan the function to find all lifetime markers. + BlockBitVecMap SeenStartMap; + InterestingSlots.clear(); + InterestingSlots.resize(NumSlot); + DegenerateSlots.clear(); + DegenerateSlots.resize(NumSlot); + + // Step 1: collect markers and populate the "InterestingSlots" + // and "DegenerateSlots" sets. + for (MachineBasicBlock *MBB : depth_first(MF)) { + + // Compute the set of slots for which we've seen a START marker but have + // not yet seen an END marker at this point in the walk (e.g. on entry + // to this bb). + BitVector BetweenStartEnd; + BetweenStartEnd.resize(NumSlot); + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI); + if (I != SeenStartMap.end()) { + BetweenStartEnd |= I->second; + } + } + + // Walk the instructions in the block to look for start/end ops. + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + continue; + InterestingSlots.set(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_START) + BetweenStartEnd.set(Slot); + else + BetweenStartEnd.reset(Slot); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << "Found a lifetime "); + DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START + ? "start" + : "end")); + DEBUG(dbgs() << " marker for slot #" << Slot); + DEBUG(dbgs() << " with allocation: " << Allocation->getName() + << "\n"); + } + Markers.push_back(&MI); + MarkersFound += 1; + } else { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot < 0) + continue; + if (! BetweenStartEnd.test(Slot)) { + DegenerateSlots.set(Slot); + } + } + } + } + BitVector &SeenStart = SeenStartMap[MBB]; + SeenStart |= BetweenStartEnd; + } + if (!MarkersFound) { + return 0; + } + DEBUG(dumpBV("Degenerate slots", DegenerateSlots)); + + // Step 2: compute begin/end sets for each block + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. @@ -246,37 +572,33 @@ BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); + SmallVector slots; for (MachineInstr &MI : *MBB) { - if (MI.getOpcode() != TargetOpcode::LIFETIME_START && - MI.getOpcode() != TargetOpcode::LIFETIME_END) - continue; - - bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &MO = MI.getOperand(0); - int Slot = MO.getIndex(); - if (Slot < 0) - continue; - - Markers.push_back(&MI); - - MarkersFound++; - - const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); - if (Allocation) { - DEBUG(dbgs()<<"Found a lifetime marker for slot #"<getName()<<"\n"); - } - - if (IsStart) { - BlockInfo.Begin.set(Slot); - } else { - if (BlockInfo.Begin.test(Slot)) { - // Allocas that start and end within a single block are handled - // specially when computing the LiveIntervals to avoid pessimizing - // the liveness propagation. - BlockInfo.Begin.reset(Slot); - } else { + bool isStart = false; + slots.clear(); + if (isLifetimeStartOrEnd(MI, slots, isStart)) { + if (!isStart) { + assert(slots.size() == 1 && "unexpected: MI ends multiple slots"); + int Slot = slots[0]; + if (BlockInfo.Begin.test(Slot)) { + BlockInfo.Begin.reset(Slot); + } BlockInfo.End.set(Slot); + } else { + for (auto Slot : slots) { + DEBUG(dbgs() << "Found a use of slot #" << Slot); + DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index "); + DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << " with allocation: "<< Allocation->getName()); + } + DEBUG(dbgs() << "\n"); + if (BlockInfo.End.test(Slot)) { + BlockInfo.End.reset(Slot); + } + BlockInfo.Begin.set(Slot); + } } } } @@ -287,90 +609,56 @@ return MarkersFound; } -void StackColoring::calculateLocalLiveness() { - // Perform a standard reverse dataflow computation to solve for - // global liveness. The BEGIN set here is equivalent to KILL in the standard - // formulation, and END is equivalent to GEN. The result of this computation - // is a map from blocks to bitvectors where the bitvectors represent which - // allocas are live in/out of that block. - SmallPtrSet BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); - unsigned NumSSMIters = 0; +void StackColoring::calculateLocalLiveness() +{ + unsigned NumIters = 0; bool changed = true; while (changed) { changed = false; - ++NumSSMIters; - - SmallPtrSet NextBBSet; + ++NumIters; for (const MachineBasicBlock *BB : BasicBlockNumbering) { - if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); BlockLifetimeInfo &BlockInfo = BI->second; + // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - BitVector LocalLiveOut; - - // Forward propagation from begins to ends. for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { LivenessMap::const_iterator I = BlockLiveness.find(*PI); assert(I != BlockLiveness.end() && "Predecessor not found"); LocalLiveIn |= I->second.LiveOut; } - LocalLiveIn |= BlockInfo.End; - LocalLiveIn.reset(BlockInfo.Begin); - - // Reverse propagation from ends to begins. - for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - LivenessMap::const_iterator I = BlockLiveness.find(*SI); - assert(I != BlockLiveness.end() && "Successor not found"); - LocalLiveOut |= I->second.LiveIn; - } - LocalLiveOut |= BlockInfo.Begin; - LocalLiveOut.reset(BlockInfo.End); - - LocalLiveIn |= LocalLiveOut; - LocalLiveOut |= LocalLiveIn; - // After adopting the live bits, we need to turn-off the bits which - // are de-activated in this block. + // Compute LiveOut by subtracting out lifetimes that end in this + // block, then adding in lifetimes that begin in this block. If + // we have both BEGIN and END markers in the same basic block + // then we know that the BEGIN marker comes after the END, + // because we already handle the case where the BEGIN comes + // before the END when collecting the markers (and building the + // BEGIN/END vectors). + BitVector LocalLiveOut = LocalLiveIn; LocalLiveOut.reset(BlockInfo.End); - LocalLiveIn.reset(BlockInfo.Begin); - - // If we have both BEGIN and END markers in the same basic block then - // we know that the BEGIN marker comes after the END, because we already - // handle the case where the BEGIN comes before the END when collecting - // the markers (and building the BEGIN/END vectore). - // Want to enable the LIVE_IN and LIVE_OUT of slots that have both - // BEGIN and END because it means that the value lives before and after - // this basic block. - BitVector LocalEndBegin = BlockInfo.End; - LocalEndBegin &= BlockInfo.Begin; - LocalLiveIn |= LocalEndBegin; - LocalLiveOut |= LocalEndBegin; + LocalLiveOut |= BlockInfo.Begin; + // Update block LiveIn set, noting whether it has changed. if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - - NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } + // Update block LiveOut set, noting whether it has changed. if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - - NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } - - BBSet = std::move(NextBBSet); }// while changed. + + NumIterations = NumIters; } void StackColoring::calculateLiveIntervals(unsigned NumSlots) { @@ -385,29 +673,22 @@ Finishes.clear(); Finishes.resize(NumSlots); - // Create the interval for the basic blocks with lifetime markers in them. - for (const MachineInstr *MI : Markers) { - if (MI->getParent() != &MBB) - continue; - - assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || - MI->getOpcode() == TargetOpcode::LIFETIME_END) && - "Invalid Lifetime marker"); + // Create the interval for the basic blocks containing lifetime begin/end. + for (const MachineInstr &MI : MBB) { - bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &Mo = MI->getOperand(0); - int Slot = Mo.getIndex(); - if (Slot < 0) + SmallVector slots; + bool IsStart = false; + if (!isLifetimeStartOrEnd(MI, slots, IsStart)) continue; - - SlotIndex ThisIndex = Indexes->getInstructionIndex(*MI); - - if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) - Starts[Slot] = ThisIndex; - } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + for (auto Slot : slots) { + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } } } @@ -423,7 +704,29 @@ } for (unsigned i = 0; i < NumSlots; ++i) { - assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + // + // When LifetimeStartOnFirstUse is turned on, data flow analysis + // is forward (from starts to ends), not bidirectional. A + // consequence of this is that we can wind up in situations + // where Starts[i] is invalid but Finishes[i] is valid and vice + // versa. Example: + // + // LIFETIME_START x + // if (...) { + // + // throw ...; + // } + // LIFETIME_END x + // return 2; + // + // + // Here the slot for "x" will not be live into the block + // containing the "return 2" (since lifetimes start with first + // use, not at the dominating LIFETIME_START marker). + // + if (Starts[i].isValid() && !Finishes[i].isValid()) { + Finishes[i] = Indexes->getMBBEndIdx(&MBB); + } if (!Starts[i].isValid()) continue; @@ -684,7 +987,6 @@ return false; SmallVector SortedSlots; - SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); @@ -717,9 +1019,12 @@ // Calculate the liveness of each block. calculateLocalLiveness(); + DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); + DEBUG(dump()); // Propagate the liveness information. calculateLiveIntervals(NumSlots); + DEBUG(dumpIntervals()); // Search for allocas which are used outside of the declared lifetime // markers. Index: test/CodeGen/X86/StackColoring.ll =================================================================== --- test/CodeGen/X86/StackColoring.ll +++ test/CodeGen/X86/StackColoring.ll @@ -87,7 +87,7 @@ } ;CHECK-LABEL: myCall_w4: -;YESCOLOR: subq $200, %rsp +;YESCOLOR: subq $120, %rsp ;NOCOLOR: subq $408, %rsp define i32 @myCall_w4(i32 %in) { @@ -217,7 +217,7 @@ ;CHECK-LABEL: myCall2_nostart: -;YESCOLOR: subq $144, %rsp +;YESCOLOR: subq $272, %rsp ;NOCOLOR: subq $272, %rsp define i32 @myCall2_nostart(i32 %in, i1 %d) { entry: @@ -425,6 +425,120 @@ ret i32 9 } +; In this case 'itar1' and 'itar2' can't be overlapped if we treat +; lifetime.start as the beginning of the lifetime, but we can +; overlap if we consider first use of the slot as lifetime +; start. See llvm bug 25776. + +;CHECK-LABEL: ifthen_twoslots: +;YESCOLOR: subq $536, %rsp +;NOCOLOR: subq $1048, %rsp + +define i32 @ifthen_twoslots(i32 %x) #0 { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %itar1 = alloca [128 x i32], align 16 + %itar2 = alloca [128 x i32], align 16 + %cleanup.dest.slot = alloca i32 + store i32 %x, i32* %x.addr, align 4 + %itar1_start_8 = bitcast [128 x i32]* %itar1 to i8* + call void @llvm.lifetime.start(i64 512, i8* %itar1_start_8) #3 + %itar2_start_8 = bitcast [128 x i32]* %itar2 to i8* + call void @llvm.lifetime.start(i64 512, i8* %itar2_start_8) #3 + %xval = load i32, i32* %x.addr, align 4 + %and = and i32 %xval, 1 + %tobool = icmp ne i32 %and, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + %arraydecay = getelementptr inbounds [128 x i32], [128 x i32]* %itar1, i32 0, i32 0 + call void @inita(i32* %arraydecay) + store i32 1, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + %itar2_end_8 = bitcast [128 x i32]* %itar2 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar2_end_8) #3 + %itar1_end_8 = bitcast [128 x i32]* %itar1 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar1_end_8) #3 + br label %cleanup + +if.else: ; preds = %entry + %arraydecay1 = getelementptr inbounds [128 x i32], [128 x i32]* %itar2, i32 0, i32 0 + call void @inita(i32* %arraydecay1) + store i32 0, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + %itar2_end2_8 = bitcast [128 x i32]* %itar2 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar2_end2_8) #3 + %itar1_end2_8 = bitcast [128 x i32]* %itar1 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar1_end2_8) #3 + br label %cleanup + +cleanup: ; preds = %if.else, %if.then + %final_retval = load i32, + i32* %retval, align 4 + ret i32 %final_retval +} + +; This function is intended to test the case where you +; have a reference to a stack slot that lies outside of +; the START/END lifetime markers-- the flow analysis +; should catch this and build the lifetime based on the +; markers only. + +;CHECK-LABEL: while_loop: +;YESCOLOR: subq $1032, %rsp +;NOCOLOR: subq $1544, %rsp + +define i32 @while_loop(i32 %x) #0 { +entry: + %b1 = alloca [128 x i32], align 16 + %b2 = alloca [128 x i32], align 16 + %b3 = alloca [128 x i32], align 16 + %tmp = bitcast [128 x i32]* %b1 to i8* + call void @llvm.lifetime.start(i64 512, i8* %tmp) #3 + %tmp1 = bitcast [128 x i32]* %b2 to i8* + call void @llvm.lifetime.start(i64 512, i8* %tmp1) #3 + %and = and i32 %x, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %arraydecay = getelementptr inbounds [128 x i32], [128 x i32]* %b2, i64 0, i64 0 + call void @inita(i32* %arraydecay) #3 + br label %if.end + +if.else: ; preds = %entry + %arraydecay1 = getelementptr inbounds [128 x i32], [128 x i32]* %b1, i64 0, i64 0 + call void @inita(i32* %arraydecay1) #3 + %arraydecay3 = getelementptr inbounds [128 x i32], [128 x i32]* %b3, i64 0, i64 0 + call void @inita(i32* %arraydecay3) #3 + %tobool25 = icmp eq i32 %x, 0 + br i1 %tobool25, label %if.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %if.else + %tmp2 = bitcast [128 x i32]* %b3 to i8* + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %x.addr.06 = phi i32 [ %x, %while.body.lr.ph ], [ %dec, %while.body ] + %dec = add nsw i32 %x.addr.06, -1 + call void @llvm.lifetime.start(i64 512, i8* %tmp2) #3 + call void @inita(i32* %arraydecay3) #3 + call void @llvm.lifetime.end(i64 512, i8* %tmp2) #3 + %tobool2 = icmp eq i32 %dec, 0 + br i1 %tobool2, label %if.end.loopexit, label %while.body + +if.end.loopexit: ; preds = %while.body + br label %if.end + +if.end: ; preds = %if.end.loopexit, %if.else, %if.then + call void @llvm.lifetime.end(i64 512, i8* %tmp1) #3 + call void @llvm.lifetime.end(i64 512, i8* %tmp) #3 + ret i32 0 +} + +declare void @inita(i32*) #2 + declare void @bar([100 x i32]* , [100 x i32]*) nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind Index: test/CodeGen/X86/misched-aa-colored.ll =================================================================== --- test/CodeGen/X86/misched-aa-colored.ll +++ test/CodeGen/X86/misched-aa-colored.ll @@ -155,6 +155,7 @@ %ref.tmp.i = alloca %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", align 8 %Op.i = alloca %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", align 8 %0 = bitcast %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i to i8* + call void @llvm.lifetime.start(i64 24, i8* %0) #1 %retval.sroa.0.0.idx.i36 = getelementptr inbounds %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i, i64 0, i32 1, i32 0, i32 0 %retval.sroa.0.0.copyload.i37 = load i32, i32* %retval.sroa.0.0.idx.i36, align 8 call void @llvm.lifetime.end(i64 24, i8* %0) #1