Index: llvm/include/llvm/CodeGen/LivePhysRegs.h =================================================================== --- llvm/include/llvm/CodeGen/LivePhysRegs.h +++ llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -31,6 +31,7 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include @@ -144,6 +145,14 @@ /// \brief Dumps the currently live registers to the debug output. void dump() const; + /// \brief Clears and computes live-out registers of @p MBB. This function + /// relies on kill marks for accuracy. + void computeLiveOuts(const TargetRegisterInfo &TRI, + const MachineBasicBlock &MBB); + + /// \brief Removes sub-registers that are contained by live super registers. + void removeRedundantSubRegs(); + private: /// Adds live-in registers from basic block @p MBB, taking associated /// lane masks into consideration. Index: llvm/lib/CodeGen/BranchFolding.h =================================================================== --- llvm/lib/CodeGen/BranchFolding.h +++ llvm/lib/CodeGen/BranchFolding.h @@ -141,6 +141,7 @@ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB, unsigned MinCommonTailLength); + void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); /// Delete the instruction OldInst and everything after it, replacing it @@ -201,6 +202,9 @@ /// If the successors of MBB has common instruction sequence at the start of /// the function, move the instructions before MBB terminator if it's legal. bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); + + // Propagates live-out callee-saved registers to To. + void propagateLiveOutCSRs(MachineBasicBlock &From, MachineBasicBlock &To); }; } Index: llvm/lib/CodeGen/BranchFolding.cpp =================================================================== --- llvm/lib/CodeGen/BranchFolding.cpp +++ llvm/lib/CodeGen/BranchFolding.cpp @@ -345,14 +345,32 @@ return TailLen; } +void BranchFolder::propagateLiveOutCSRs(MachineBasicBlock &From, + MachineBasicBlock &To) { + const auto &MFI = From.getParent()->getFrameInfo(); + const bool HasCSI = MFI.isCalleeSavedInfoValid(); + const auto IsCalleeSavedReg = [&MFI](unsigned Reg) { + const auto &CSI = MFI.getCalleeSavedInfo(); + return std::find_if(CSI.begin(), CSI.end(), + [Reg](const CalleeSavedInfo &Info) { + return Info.getReg() == Reg; + }) != CSI.end(); + }; + LiveRegs.computeLiveOuts(*TRI, From); + for (unsigned Reg : LiveRegs) { + if (!HasCSI || IsCalleeSavedReg(Reg)) + To.addLiveIn(Reg); + } +} + void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); TII->ReplaceTailWithBranchTo(OldInst, NewDest); - if (UpdateLiveIns) { - NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *TRI, *NewDest); - } + if (UpdateLiveIns) + if (NewDest->isReturnBlock()) + propagateLiveOutCSRs(*CurMBB, *NewDest); ++NumTailMerge; } @@ -387,8 +405,11 @@ // NewMBB inherits CurMBB's block frequency. MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); - if (UpdateLiveIns) + if (UpdateLiveIns) { computeLiveIns(LiveRegs, *TRI, *NewMBB); + if (NewMBB->isReturnBlock()) + propagateLiveOutCSRs(CurMBB, *NewMBB); + } // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -954,6 +975,7 @@ // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].getMPIter()); } + MBB->sortUniqueLiveIns(); DEBUG(dbgs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks // that match it with a smaller number of instructions. Index: llvm/lib/CodeGen/LivePhysRegs.cpp =================================================================== --- llvm/lib/CodeGen/LivePhysRegs.cpp +++ llvm/lib/CodeGen/LivePhysRegs.cpp @@ -207,18 +207,35 @@ LiveRegs.addLiveOutsNoPristines(MBB); for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) LiveRegs.stepBackward(MI); + LiveRegs.removeRedundantSubRegs(); + for (unsigned Reg : LiveRegs) + MBB.addLiveIn(Reg); +} - for (unsigned Reg : LiveRegs) { +void LivePhysRegs::computeLiveOuts(const TargetRegisterInfo &TRI, + const MachineBasicBlock &MBB) { + init(TRI); + addBlockLiveIns(MBB); + SmallVector, 16> Clobbers; + for (const MachineInstr &MI : make_range(MBB.begin(), MBB.end())) + stepForward(MI, Clobbers); + removeRedundantSubRegs(); +} + +void LivePhysRegs::removeRedundantSubRegs() { + assert(TRI); + for (auto I = LiveRegs.begin(); I != LiveRegs.end();) { // Skip the register if we are about to add one of its super registers. bool ContainsSuperReg = false; - for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) { - if (LiveRegs.contains(*SReg)) { + for (MCSuperRegIterator SReg(*I, TRI); SReg.isValid(); ++SReg) { + if (contains(*SReg)) { ContainsSuperReg = true; break; } } if (ContainsSuperReg) - continue; - MBB.addLiveIn(Reg); + I = LiveRegs.erase(I); + else + ++I; } } Index: llvm/test/CodeGen/PowerPC/if-converter-split-liveins.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/if-converter-split-liveins.ll @@ -0,0 +1,130 @@ +; RUN: llc -stop-after=if-converter < %s | FileCheck %s + +; CHECK: callee-saved-register: '[[CSR:%[a-z][0-9]+]]' +; CHECK: bb.2.if.then: +; CHECK-NEXT: [[CSR]] + +target triple = "powerpc64le--linux-gnu" + +; Function Attrs: norecurse nounwind readonly +define i64 @adler32_z(i64 %adler, i8* readonly %buf, i64 %len) local_unnamed_addr #0 { +entry: + %shr = lshr i64 %adler, 16 + %and = and i64 %shr, 65535 + %and1 = and i64 %adler, 65535 + br i1 undef, label %if.then, label %if.end15 + +if.then: ; preds = %entry + %add5 = add nsw i64 %and1, %and + %sub9 = add nsw i64 %add5, 281474976645135 + %shl = shl i64 %add5, 16 + %or = or i64 %shl, %and1 + br label %cleanup + +if.end15: ; preds = %entry + br i1 undef, label %while.cond.preheader, label %while.cond30.preheader + +while.cond30.preheader: ; preds = %if.end15 + br i1 undef, label %while.body33.preheader, label %while.body109.preheader + +while.body33.preheader: ; preds = %while.cond30.preheader + br label %while.body33 + +while.cond.preheader: ; preds = %if.end15 + %sub25 = add i64 %and1, -65521 + %rem = urem i64 %and, 65521 + %shl27 = shl nuw nsw i64 %rem, 16 + %or28 = or i64 %shl27, %and1 + br label %cleanup + +while.body33: ; preds = %do.end, %while.body33.preheader + %indvar = phi i64 [ %indvar.next, %do.end ], [ 0, %while.body33.preheader ] + %sum2.2385 = phi i64 [ %rem102, %do.end ], [ %and, %while.body33.preheader ] + %len.addr.1384 = phi i64 [ %sub34, %do.end ], [ %len, %while.body33.preheader ] + %buf.addr.1383 = phi i8* [ %scevgep390, %do.end ], [ %buf, %while.body33.preheader ] + %adler.addr.3382 = phi i64 [ %rem101, %do.end ], [ %and1, %while.body33.preheader ] + %0 = mul i64 %indvar, 5552 + %1 = add i64 %0, -13 + %scevgep2 = getelementptr i8, i8* %buf, i64 %1 + %sub34 = add i64 %len.addr.1384, -5552 + call void @llvm.ppc.mtctr.i64(i64 347) + br label %do.body + +do.body: ; preds = %do.body, %while.body33 + %adler.addr.4 = phi i64 [ %adler.addr.3382, %while.body33 ], [ %add49, %do.body ] + %sum2.3 = phi i64 [ %sum2.2385, %while.body33 ], [ %add98, %do.body ] + %tmp15.phi = phi i8* [ %scevgep2, %while.body33 ], [ %tmp15.inc, %do.body ] + %tmp15.inc = getelementptr i8, i8* %tmp15.phi, i64 16 + %add38 = add i64 %adler.addr.4, %sum2.3 + %add42 = add i64 %add38, %adler.addr.4 + %add46 = add i64 %add42, %adler.addr.4 + %tmp15 = load i8, i8* %tmp15.inc, align 1, !tbaa !1 + %conv48 = zext i8 %tmp15 to i64 + %add49 = add i64 %adler.addr.4, %conv48 + %add50 = add i64 %add46, %add49 + %add54 = add i64 %add50, %add49 + %add58 = add i64 %add54, %add49 + %add62 = add i64 %add58, %add49 + %add66 = add i64 %add62, %add49 + %add70 = add i64 %add66, %add49 + %add74 = add i64 %add70, %add49 + %add78 = add i64 %add74, %add49 + %add82 = add i64 %add78, %add49 + %add86 = add i64 %add82, %add49 + %add90 = add i64 %add86, %add49 + %add94 = add i64 %add90, %add49 + %add98 = add i64 %add94, %add49 + %2 = call i1 @llvm.ppc.is.decremented.ctr.nonzero() + br i1 %2, label %do.body, label %do.end + +do.end: ; preds = %do.body + %scevgep390 = getelementptr i8, i8* %buf.addr.1383, i64 5552 + %rem101 = urem i64 %add49, 65521 + %rem102 = urem i64 %add98, 65521 + %cmp31 = icmp ugt i64 %sub34, 5551 + %indvar.next = add i64 %indvar, 1 + br i1 %cmp31, label %while.body33, label %while.end103 + +while.end103: ; preds = %do.end + br i1 undef, label %if.end188, label %while.body109.preheader + +while.body109.preheader: ; preds = %while.end103, %while.cond30.preheader + %buf.addr.1.lcssa394400 = phi i8* [ %buf, %while.cond30.preheader ], [ %scevgep390, %while.end103 ] + %arrayidx151 = getelementptr inbounds i8, i8* %buf.addr.1.lcssa394400, i64 10 + %tmp45 = load i8, i8* %arrayidx151, align 1, !tbaa !1 + %conv152 = zext i8 %tmp45 to i64 + br label %while.body109 + +while.body109: ; preds = %while.body109, %while.body109.preheader + %adler.addr.5373 = phi i64 [ %add153, %while.body109 ], [ undef, %while.body109.preheader ] + %add153 = add i64 %adler.addr.5373, %conv152 + br label %while.body109 + +if.end188: ; preds = %while.end103 + %shl189 = shl nuw nsw i64 %rem102, 16 + %or190 = or i64 %shl189, %rem101 + br label %cleanup + +cleanup: ; preds = %if.end188, %while.cond.preheader, %if.then + %retval.0 = phi i64 [ %or, %if.then ], [ %or28, %while.cond.preheader ], [ %or190, %if.end188 ] + ret i64 %retval.0 +} + +; Function Attrs: nounwind +declare void @llvm.ppc.mtctr.i64(i64) #1 + +; Function Attrs: nounwind +declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #1 + +; Function Attrs: nounwind +declare void @llvm.stackprotector(i8*, i8**) #1 + +attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"}