Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -50,6 +50,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/LiveInterval.h" #include <algorithm> #include <cassert> #include <climits> @@ -3178,6 +3179,21 @@ return false; } + /// The target can specify whether a callee-saved register should be used + /// rather than spliting the live range. Default behaviour is yes. + virtual bool useCSRInsteadOfSplit(const LiveInterval &LI) const { + return true; + } + + /// Target specific cost of using a callee-saved register for the first time + /// when the live range of the value spans the passed blocks. A target should + /// only return a value other than zero here if splitting might be preferred + /// to a CSR use (i.e. it would return false from useCSRInsteadOfSplit for the + /// respective live range). + virtual int64_t costOfFirstCSRForBlocks( + const SmallVectorImpl<MachineBasicBlock*> &UseMBBs) const { + return 0; + } /// Lower TLS global address SDNode for target independent emulated TLS model. virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetLowering.h" #include <queue> using namespace llvm; @@ -120,6 +121,7 @@ // Shortcuts to some useful interface. const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const TargetLowering *TLI; RegisterClassInfo RCI; // analyses @@ -2331,7 +2333,17 @@ // the cost of splitting is lower than CSRCost. SA->analyze(&VirtReg); unsigned NumCands = 0; - BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + BlockFrequency BestCost; + bool GetCostFromTarget = !TLI->useCSRInsteadOfSplit(VirtReg); + if (GetCostFromTarget) { + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + SmallVector<MachineBasicBlock*, 4> UseMBBs; + for (int i = 0, e = UseBlocks.size(); i < e; ++i) + UseMBBs.push_back(UseBlocks[i].MBB); + BestCost = TLI->costOfFirstCSRForBlocks(UseMBBs); + } else + BestCost = CSRCost; + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, true /*IgnoreCSR*/); if (BestCand == NoCand) @@ -2550,8 +2562,8 @@ // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. - if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && - NewVRegs.empty()) { + if ((CSRCost.getFrequency() || !TLI->useCSRInsteadOfSplit(VirtReg)) && + isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) @@ -2704,6 +2716,7 @@ MF = &mf; TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); RCI.runOnMachineFunction(mf); EnableLocalReassign = EnableLocalReassignment || Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -1013,6 +1013,9 @@ SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + virtual bool useCSRInsteadOfSplit(const LiveInterval &LI) const override; + virtual int64_t costOfFirstCSRForBlocks( + const SmallVectorImpl<MachineBasicBlock*> &UseMBBs) const override; }; namespace PPC { Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -12937,3 +12937,33 @@ return Imm.isPosZero(); } } + +// If the live interval can be spilled, we'd prefer to do so (subject to cost +// function below). +bool PPCTargetLowering::useCSRInsteadOfSplit(const LiveInterval &LI) const { + return !LI.isSpillable(); +} + +static bool hasCall(MachineBasicBlock *MBB) { + for (MachineInstr &MI : *MBB) + if (MI.isCall()) + return true; + return false; +} + +// Return a high cost for the first use of a callee-saved register if the live +// range of the value spans basic blocks in which we'd prefer not to use one. +// This will often defer use of a CSR and give shrink-wrapping an opportunity +// to sink/hoist the save/restore from entry/exit blocks respectively. +int64_t PPCTargetLowering::costOfFirstCSRForBlocks( + const SmallVectorImpl<MachineBasicBlock*> &UseMBBs) const { + // Make uses of callee-saved registers expensive if any blocks in the live + // range have no calls. The actual cost may need some adjustment - this is a + // rather arbitrary number that seems to produce good performance improvement. + const int CostOnBlockWithoutCall = 1 << 15; + for (MachineBasicBlock *It : UseMBBs) { + if (!hasCall(It)) + return CostOnBlockWithoutCall; + } + return TargetLowering::costOfFirstCSRForBlocks(UseMBBs); +} Index: test/CodeGen/PowerPC/branch-opt.ll =================================================================== --- test/CodeGen/PowerPC/branch-opt.ll +++ test/CodeGen/PowerPC/branch-opt.ll @@ -11,10 +11,10 @@ ; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy ; of %cond_next48, so there should only be two unconditional branches. -;CHECK: b LBB0_13 -;CHECK: b LBB0_13 -;CHECK-NOT: b LBB0_13 -;CHECK: LBB0_13: ; %cond_next48 +;CHECK: b LBB0_20 +;CHECK: b LBB0_20 +;CHECK-NOT: b LBB0_18 +;CHECK: LBB0_18: ; %cond_next48 define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) { entry: Index: test/CodeGen/PowerPC/ppc-shrink-wrapping.ll =================================================================== --- test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -91,10 +91,11 @@ ; ; Loop body ; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body +; CHECK: mr [[NEWSUM:[0-9]+]], [[SUM]] ; CHECK: bl something ; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-NEXT: cmplwi [[IV]], 0 +; CHECK-DAG: add [[NEWSUM]], 3, [[NEWSUM]] +; CHECK: cmplwi [[IV]], 0 ; CHECK-NEXT: bne 0, .[[LOOP]] ; ; Next BB. @@ -159,12 +160,13 @@ ; Loop preheader ; CHECK-DAG: li [[SUM:[0-9]+]], 0 ; CHECK-DAG: li [[IV:[0-9]+]], 10 -; +; ; Loop body ; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body ; CHECK: bl something +; CHECK: mr [[NEWRET:[0-9]+]], 3 ; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-DAG: add [[SUM]], [[NEWRET]], [[SUM]] ; CHECK-NEXT: cmplwi [[IV]], 0 ; CHECK-NEXT: bne 0, .[[LOOP]] ; @@ -301,16 +303,17 @@ ; Loop preheader ; CHECK-DAG: li [[SUM:[0-9]+]], 0 ; CHECK-DAG: li [[IV:[0-9]+]], 10 -; +; ; Loop body ; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body +; CHECK: mr [[NEWSUM:[0-9]+]], [[SUM]] ; CHECK: bl something ; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-DAG: add [[NEWSUM]], 3, [[NEWSUM]] ; CHECK-NEXT: cmplwi [[IV]], 0 -; CHECK-NEXT: bne 0, .[[LOOP]] +; CHECK: bne 0, .[[LOOP]] ; -; Next BB. +; Next BB. ; slwi 3, [[SUM]], 3 ; ; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]] Index: test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll +++ test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll @@ -25,10 +25,10 @@ ; NONINVFUNCDESC-LABEL: @bar ; NONINVFUNCDESC: %for.body ; NONINVFUNCDESC: std 2, 40(1) -; NONINVFUNCDESC-DAG: ld 3, 0(30) -; NONINVFUNCDESC-DAG: ld 11, 16(30) -; NONINVFUNCDESC-DAG: ld 2, 8(30) -; NONINVFUNCDESC: mtctr 3 +; NONINVFUNCDESC-DAG: ld 5, 0(3) +; NONINVFUNCDESC-DAG: ld 11, 16(3) +; NONINVFUNCDESC-DAG: ld 2, 8(3) +; NONINVFUNCDESC: mtctr 5 ; NONINVFUNCDESC: bctrl ; NONINVFUNCDESC-NEXT: ld 2, 40(1) Index: test/CodeGen/PowerPC/tail-dup-break-cfg.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-break-cfg.ll +++ test/CodeGen/PowerPC/tail-dup-break-cfg.ll @@ -11,16 +11,15 @@ ; exit ;CHECK-LABEL: tail_dup_break_cfg: -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]] ;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit ;CHECK: blr ;CHECK-NEXT: [[BODY1LABEL]] -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: beq 0, [[EXITLABEL]] ;CHECK-NEXT: [[BODY2LABEL:[._0-9A-Za-z]+]]: ;CHECK: b [[EXITLABEL]] @@ -53,12 +52,11 @@ ; The branch weights here hint that we shouldn't tail duplicate in this case. ;CHECK-LABEL: tail_dup_dont_break_cfg: -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %body1 ;CHECK: [[TEST2LABEL]]: # %test2 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %body2 ;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit Index: test/CodeGen/PowerPC/tail-dup-layout.ll =================================================================== --- test/CodeGen/PowerPC/tail-dup-layout.ll +++ test/CodeGen/PowerPC/tail-dup-layout.ll @@ -21,28 +21,27 @@ ; the optional blocks and that the optional blocks are in the correct order. ;CHECK-LABEL: straight_test: ; test1 may have been merged with entry -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 29, 29 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 28, 28 ;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit ;CHECK: blr ;CHECK-NEXT: .[[OPT1LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]] ;CHECK-NEXT: .[[OPT2LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 29, 29 ;CHECK-NEXT: beq 0, .[[TEST4LABEL]] ;CHECK-NEXT: .[[OPT3LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 28, 28 ;CHECK-NEXT: beq 0, .[[EXITLABEL]] ;CHECK-NEXT: .[[OPT4LABEL]]: ;CHECK: b .[[EXITLABEL]] @@ -114,22 +113,21 @@ ; the optional blocks and that the optional blocks are in the correct order. ;CHECK-LABEL: straight_test_50: ; test1 may have been merged with entry -;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 -;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: rlwinm. {{[0-9]+}}, 3, 0, 29, 29 ;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit ;CHECK: blr ;CHECK-NEXT: .[[OPT1LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 30, 30 ;CHECK-NEXT: beq 0, .[[TEST3LABEL]] ;CHECK-NEXT: .[[OPT2LABEL]]: -;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK: rlwinm. {{[0-9]+}}, 3, 0, 29, 29 ;CHECK-NEXT: beq 0, .[[EXITLABEL]] ;CHECK-NEXT: .[[OPT3LABEL]]: ;CHECK: b .[[EXITLABEL]] @@ -182,14 +180,15 @@ ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. ;CHECK-LABEL: loop_test: -;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 +;CHECK: extsw [[SEXTREG:[0-9]+]], 4 +;CHECK: add [[TAGPTRREG:[0-9]+]], 3, [[SEXTREG]] ;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch ;CHECK: addi ;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check ;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) ;CHECK: # %test1 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] +;CHECK: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]