Index: lib/CodeGen/StackColoring.cpp =================================================================== --- lib/CodeGen/StackColoring.cpp +++ lib/CodeGen/StackColoring.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -68,13 +67,23 @@ /// The user may write code that uses allocas outside of the declared lifetime /// zone. This can happen when the user returns a reference to a local /// data-structure. We can detect these cases and decide not to optimize the -/// code. If this flag is enabled, we try to save the user. +/// code. If this flag is enabled, we try to save the user. This option +/// is treated as overriding LifetimeStartOnFirstUse below. static cl::opt ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken")); +/// Enable enhanced dataflow scheme for lifetime analysis (treat first +/// use of stack slot as start of slot lifetime, as opposed to looking +/// for LIFETIME_START marker). +static cl::opt +LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", + cl::init(true), cl::Hidden, + cl::desc("Treat stack lifetimes as starting on first use, not on START marker.")); + + STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); @@ -127,6 +136,13 @@ /// once the coloring is done. SmallVector Markers; + /// Record the FI slots for which we have seen some sort of + /// lifetime marker (either start or end). + BitVector InterestingSlots; + + /// Number of iterations taken during data flow analysis. + unsigned NumIterations; + public: static char ID; StackColoring() : MachineFunctionPass(ID) { @@ -154,6 +170,15 @@ /// in and out blocks. void calculateLocalLiveness(); + /// Examines the specified instruction and returns TRUE if the instruction + /// represents the start or end of an interesting lifetime. The slot or slots + /// starting or ending are added to the vector "slots" and "isStart" is set + /// accordingly. + /// \returns True if inst contains a lifetime start or end + bool isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector &slots, + bool &isStart); + /// Construct the LiveIntervals for the slots. void calculateLiveIntervals(unsigned NumSlots); @@ -171,7 +196,7 @@ /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. - void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots); + void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots); }; } // end anonymous namespace @@ -180,15 +205,12 @@ INITIALIZE_PASS_BEGIN(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -226,9 +248,100 @@ } } -unsigned StackColoring::collectMarkers(unsigned NumSlot) { +static inline int getStartOrEndSlot(const MachineInstr &MI) +{ + assert((MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) && + "Expected LIFETIME_START or LIFETIME_END op"); + const MachineOperand &MO = MI.getOperand(0); + int Slot = MO.getIndex(); + if (Slot >= 0) + return Slot; + return -1; +} + +// +// At the moment the only way to end a variable lifetime is with +// a VARIABLE_LIFETIME op (which can't contain a start). If things +// change and the IR allows for a single inst that both begins +// and ends lifetime(s), this interface will need to be reworked. +// +bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector &slots, + bool &isStart) +{ + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + return false; + if (!InterestingSlots.test(Slot)) + return false; + slots.push_back(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_END) { + isStart = false; + return true; + } + if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas) { + isStart = true; + return true; + } + } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { + if (! MI.isDebugValue()) { + bool found = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot<0) + continue; + if (InterestingSlots.test(Slot)) { + slots.push_back(Slot); + found = true; + } + } + if (found) { + isStart = true; + return true; + } + } + } + return false; +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) +{ unsigned MarkersFound = 0; - // Scan the function to find all lifetime markers. + + // Step 1: collect markers and populate "InterestingSlots" set + InterestingSlots.clear(); + InterestingSlots.resize(NumSlot); + for (MachineBasicBlock *MBB : depth_first(MF)) { + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + continue; + InterestingSlots.set(Slot); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found a lifetime "); + DEBUG(dbgs()<< (MI.getOpcode() == TargetOpcode::LIFETIME_START ? "start" : "end")); + DEBUG(dbgs()<<" marker for slot #"<getName()<<"\n"); + } + Markers.push_back(&MI); + MarkersFound += 1; + } + } + } + if (!MarkersFound) { + return 0; + } + + // Step 2: compute begin/end sets for each block + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. @@ -244,37 +357,36 @@ BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); + SmallVector slots; for (MachineInstr &MI : *MBB) { - if (MI.getOpcode() != TargetOpcode::LIFETIME_START && - MI.getOpcode() != TargetOpcode::LIFETIME_END) - continue; - - bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &MO = MI.getOperand(0); - int Slot = MO.getIndex(); - if (Slot < 0) - continue; - - Markers.push_back(&MI); - - MarkersFound++; - - const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); - if (Allocation) { - DEBUG(dbgs()<<"Found a lifetime marker for slot #"<getName()<<"\n"); - } - - if (IsStart) { - BlockInfo.Begin.set(Slot); - } else { - if (BlockInfo.Begin.test(Slot)) { - // Allocas that start and end within a single block are handled - // specially when computing the LiveIntervals to avoid pessimizing - // the liveness propagation. - BlockInfo.Begin.reset(Slot); + bool isStart = false; + slots.clear(); + if (isLifetimeStartOrEnd(MI, slots, isStart)) { + if (!isStart) { + assert(slots.size() == 1); + int Slot = slots[0]; + if (BlockInfo.Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockInfo.Begin.reset(Slot); + } else { + BlockInfo.End.set(Slot); + } } else { - BlockInfo.End.set(Slot); + for (auto Slot : slots) { + if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { + DEBUG(dbgs()<<"Found a use of slot #"<getNumber() << " index "); + DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << " with allocation: "<< Allocation->getName()); + } + DEBUG(dbgs() << "\n"); + } + BlockInfo.Begin.set(Slot); + } } } } @@ -285,90 +397,56 @@ return MarkersFound; } -void StackColoring::calculateLocalLiveness() { - // Perform a standard reverse dataflow computation to solve for - // global liveness. The BEGIN set here is equivalent to KILL in the standard - // formulation, and END is equivalent to GEN. The result of this computation - // is a map from blocks to bitvectors where the bitvectors represent which - // allocas are live in/out of that block. - SmallPtrSet BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); - unsigned NumSSMIters = 0; +void StackColoring::calculateLocalLiveness() +{ + unsigned NumIters = 0; bool changed = true; while (changed) { changed = false; - ++NumSSMIters; - - SmallPtrSet NextBBSet; + ++NumIters; for (const MachineBasicBlock *BB : BasicBlockNumbering) { - if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); BlockLifetimeInfo &BlockInfo = BI->second; + // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - BitVector LocalLiveOut; - - // Forward propagation from begins to ends. for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { LivenessMap::const_iterator I = BlockLiveness.find(*PI); assert(I != BlockLiveness.end() && "Predecessor not found"); LocalLiveIn |= I->second.LiveOut; } - LocalLiveIn |= BlockInfo.End; - LocalLiveIn.reset(BlockInfo.Begin); - - // Reverse propagation from ends to begins. - for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - LivenessMap::const_iterator I = BlockLiveness.find(*SI); - assert(I != BlockLiveness.end() && "Successor not found"); - LocalLiveOut |= I->second.LiveIn; - } - LocalLiveOut |= BlockInfo.Begin; - LocalLiveOut.reset(BlockInfo.End); - LocalLiveIn |= LocalLiveOut; - LocalLiveOut |= LocalLiveIn; - - // After adopting the live bits, we need to turn-off the bits which - // are de-activated in this block. + // Compute LiveOut by subtracting out lifetimes that end in this + // block, then adding in lifetimes that begin in this block. If + // we have both BEGIN and END markers in the same basic block + // then we know that the BEGIN marker comes after the END, + // because we already handle the case where the BEGIN comes + // before the END when collecting the markers (and building the + // BEGIN/END vectors). + BitVector LocalLiveOut = LocalLiveIn; LocalLiveOut.reset(BlockInfo.End); - LocalLiveIn.reset(BlockInfo.Begin); - - // If we have both BEGIN and END markers in the same basic block then - // we know that the BEGIN marker comes after the END, because we already - // handle the case where the BEGIN comes before the END when collecting - // the markers (and building the BEGIN/END vectore). - // Want to enable the LIVE_IN and LIVE_OUT of slots that have both - // BEGIN and END because it means that the value lives before and after - // this basic block. - BitVector LocalEndBegin = BlockInfo.End; - LocalEndBegin &= BlockInfo.Begin; - LocalLiveIn |= LocalEndBegin; - LocalLiveOut |= LocalEndBegin; + LocalLiveOut |= BlockInfo.Begin; + // Update block LiveIn set, noting whether it has changed. if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - - NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } + // Update block LiveOut set, noting whether it has changed. if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - - NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } - - BBSet = std::move(NextBBSet); }// while changed. + + NumIterations = NumIters; } void StackColoring::calculateLiveIntervals(unsigned NumSlots) { @@ -383,29 +461,22 @@ Finishes.clear(); Finishes.resize(NumSlots); - // Create the interval for the basic blocks with lifetime markers in them. - for (const MachineInstr *MI : Markers) { - if (MI->getParent() != &MBB) - continue; - - assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || - MI->getOpcode() == TargetOpcode::LIFETIME_END) && - "Invalid Lifetime marker"); + // Create the interval for the basic blocks containing lifetime begin/end. + for (const MachineInstr &MI : MBB) { - bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &Mo = MI->getOperand(0); - int Slot = Mo.getIndex(); - if (Slot < 0) + SmallVector slots; + bool IsStart = false; + if (!isLifetimeStartOrEnd(MI, slots, IsStart)) continue; - - SlotIndex ThisIndex = Indexes->getInstructionIndex(*MI); - - if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) - Starts[Slot] = ThisIndex; - } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + for (auto Slot : slots) { + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } } } @@ -421,6 +492,34 @@ } for (unsigned i = 0; i < NumSlots; ++i) { + // + // When LifetimeStartOnFirstUse is turned on, data flow analysis + // is forward (from starts to ends), not bidirectional. A + // consequence of this is that we can wind up in situations + // where Starts[i] is invalid but Finishes[i] is valid. Example: + // + // LIFETIME_START x + // if (...) { + // + // throw ...; + // } else { + // LIFETIME_END x + // return 2; + // } + // + // Here the slot for "x" will not be live into the block + // containing the "return 2" (since lifetimes start with first + // use, not at the dominating LIFETIME_START marker). This is + // not an indicator of invalid or insane IR, it's simple a + // case we need to allow for. + // + if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas && + !Starts[i].isValid() && Finishes[i].isValid()) { + if (i >= MBBLiveness.LiveIn.size() || !MBBLiveness.LiveIn.test(i)) { + continue; + } + } + assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); if (!Starts[i].isValid()) continue; @@ -682,7 +781,6 @@ return false; SmallVector SortedSlots; - SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); @@ -715,9 +813,12 @@ // Calculate the liveness of each block. calculateLocalLiveness(); + DEBUG(dbgs()<< "Dataflow iterations: " << NumIterations << "\n"); + DEBUG(dump()); // Propagate the liveness information. calculateLiveIntervals(NumSlots); + // DEBUG(dumpIntervals()); // Search for allocas which are used outside of the declared lifetime // markers. Index: test/CodeGen/X86/StackColoring.ll =================================================================== --- test/CodeGen/X86/StackColoring.ll +++ test/CodeGen/X86/StackColoring.ll @@ -87,7 +87,7 @@ } ;CHECK-LABEL: myCall_w4: -;YESCOLOR: subq $200, %rsp +;YESCOLOR: subq $120, %rsp ;NOCOLOR: subq $408, %rsp define i32 @myCall_w4(i32 %in) { @@ -330,7 +330,7 @@ call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind ret void } -;YESCOLOR: subq $272, %rsp +;YESCOLOR: subq $144, %rsp ;NOCOLOR: subq $272, %rsp define i32 @myCall_end_before_begin(i32 %in, i1 %d) { @@ -425,6 +425,61 @@ ret i32 9 } +; In this case 'itar1' and 'itar2' can't be overlapped if we treat +; lifetime.start as the beginning of the lifetime, but we can +; overlap if we consider first use of the slot as lifetime +; start. See llvm bug 25776. + +;CHECK-LABEL: ifthen_twoslots: +;YESCOLOR: subq $536, %rsp +;NOCOLOR: subq $1048, %rsp + +define i32 @ifthen_twoslots(i32 %x) #0 { +entry: + %retval = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %itar1 = alloca [128 x i32], align 16 + %itar2 = alloca [128 x i32], align 16 + %cleanup.dest.slot = alloca i32 + store i32 %x, i32* %x.addr, align 4 + %itar1_start_8 = bitcast [128 x i32]* %itar1 to i8* + call void @llvm.lifetime.start(i64 512, i8* %itar1_start_8) #3 + %itar2_start_8 = bitcast [128 x i32]* %itar2 to i8* + call void @llvm.lifetime.start(i64 512, i8* %itar2_start_8) #3 + %xval = load i32, i32* %x.addr, align 4 + %and = and i32 %xval, 1 + %tobool = icmp ne i32 %and, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + %arraydecay = getelementptr inbounds [128 x i32], [128 x i32]* %itar1, i32 0, i32 0 + call void @inita(i32* %arraydecay) + store i32 1, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + %itar2_end_8 = bitcast [128 x i32]* %itar2 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar2_end_8) #3 + %itar1_end_8 = bitcast [128 x i32]* %itar1 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar1_end_8) #3 + br label %cleanup + +if.else: ; preds = %entry + %arraydecay1 = getelementptr inbounds [128 x i32], [128 x i32]* %itar2, i32 0, i32 0 + call void @inita(i32* %arraydecay1) + store i32 0, i32* %retval, align 4 + store i32 1, i32* %cleanup.dest.slot, align 4 + %itar2_end2_8 = bitcast [128 x i32]* %itar2 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar2_end2_8) #3 + %itar1_end2_8 = bitcast [128 x i32]* %itar1 to i8* + call void @llvm.lifetime.end(i64 512, i8* %itar1_end2_8) #3 + br label %cleanup + +cleanup: ; preds = %if.else, %if.then + %final_retval = load i32, i32* %retval, align 4 + ret i32 %final_retval +} + +declare void @inita(i32*) #2 + declare void @bar([100 x i32]* , [100 x i32]*) nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind Index: test/CodeGen/X86/misched-aa-colored.ll =================================================================== --- test/CodeGen/X86/misched-aa-colored.ll +++ test/CodeGen/X86/misched-aa-colored.ll @@ -155,6 +155,7 @@ %ref.tmp.i = alloca %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", align 8 %Op.i = alloca %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", align 8 %0 = bitcast %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i to i8* + call void @llvm.lifetime.start(i64 24, i8* %0) #1 %retval.sroa.0.0.idx.i36 = getelementptr inbounds %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i, i64 0, i32 1, i32 0, i32 0 %retval.sroa.0.0.copyload.i37 = load i32, i32* %retval.sroa.0.0.idx.i36, align 8 call void @llvm.lifetime.end(i64 24, i8* %0) #1