Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -154,6 +154,9 @@ /// This pass adds dead/undef flags after analyzing subregister lanes. extern char &DetectDeadLanesID; + /// This pass perform post-ra machine sink for COPY instructions. + extern char &PostRAMachineSinkingID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -953,6 +953,11 @@ /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + /// Remember what registers the specified instruction uses and modifies. + virtual void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, + BitVector &UsedRegs, + const TargetRegisterInfo *TRI) const; + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -101,6 +101,7 @@ void initializeCostModelAnalysisPass(PassRegistry&); void initializeEntryExitInstrumenterPass(PassRegistry&); void initializePostInlineEntryExitInstrumenterPass(PassRegistry&); +void initializePostRAMachineSinkingPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -73,6 +73,7 @@ initializePeepholeOptimizerPass(Registry); initializePostMachineSchedulerPass(Registry); initializePostRAHazardRecognizerPass(Registry); + initializePostRAMachineSinkingPass(Registry); initializePostRASchedulerPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); Index: lib/CodeGen/MachineSink.cpp =================================================================== --- lib/CodeGen/MachineSink.cpp +++ lib/CodeGen/MachineSink.cpp @@ -77,6 +77,7 @@ STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); namespace { @@ -902,3 +903,191 @@ return true; } + +//===----------------------------------------------------------------------===// +// This pass is not intended to be a replacement or a complete alternative +// for the pre-ra machine sink pass. It is only designed to sink COPY +// instructions which should be handled after RA. +// +// This pass sinks COPY instructions into a successor block, if the COPY is not +// used in the current block and the COPY is live-in to a single successor +// (i.e., doesn't require the COPY to be duplicated). This avoids executing the +// the copy on paths where their results aren't needed. This also exposes +// additional opportunites for dead copy elimination and shrink wrapping. +// +// These copies were either not handled by or are inserted after the MachineSink +// pass. As an example of the former case, the MachineSink pass cannot sink +// COPY instructions with allocatable source registers; for AArch64 these type +// of copy instructions are frequently used to move function parameters (PhyReg) +// into virtual registers in the entry block. +// +// For the machine IR below, this pass will sink %w19 in the entry into its +// successor (%bb.1) because %w19 is only live-in in %bb.1. +// %bb.0: +// %wzr = SUBSWri %w1, 1 +// %w19 = COPY %w0 +// Bcc 11, %bb.2 +// %bb.1: +// Live Ins: %w19 +// BL @fun +// %w0 = ADDWrr %w0, %w19 +// RET %w0 +// %bb.2: +// %w0 = COPY %wzr +// RET %w0 +// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be +// able to see %bb.0 as a candidate. +//===----------------------------------------------------------------------===// +namespace { + +class PostRAMachineSinking : public MachineFunctionPass { +public: + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + PostRAMachineSinking() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return "PostRA Machine Sink"; } + +private: + /// Track which registers have been modified and used. + BitVector ModifiedRegs, UsedRegs; + + /// Sink Copy instructions unused in the same block close to their uses in + /// successors. + bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, + const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); +}; +} // namespace + +char PostRAMachineSinking::ID = 0; +char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; + +INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", + "PostRA Machine Sink", false, false) + +static MachineBasicBlock * +getSingleLiveInSuccBB(MachineBasicBlock &CurBB, + ArrayRef SinkableBBs, + unsigned Reg, const TargetRegisterInfo *TRI) { + SmallSet AliasedRegs; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + AliasedRegs.insert(*AI); + + // Try to find a single sinkable successor in which Reg is live-in. + MachineBasicBlock *BB = nullptr; + for (auto *SI : SinkableBBs) { + if (SI->isLiveIn(Reg)) { + // If BB is set here, Reg is live-in to at least two sinkable successors, + // so quit. + if (BB) + return nullptr; + BB = SI; + } + } + // Reg is not live-in to any sinkable successors. + if (!BB) + return nullptr; + + // Check if any register aliased with Reg is live-in in other successors. + for (auto *SI : CurBB.successors()) { + if (SI == BB) + continue; + for (const auto LI : SI->liveins()) + if (AliasedRegs.count(LI.PhysReg)) + return nullptr; + } + return BB; +} + +bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, + MachineFunction &MF, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII) { + SmallVector SinkableBBs; + // FIXME: For now, we sink only to a successor which has a single predecessor + // so that we can directly sink COPY instructions to the successor without + // adding any new block or branch instruction. + for (MachineBasicBlock *SI : CurBB.successors()) + if (!SI->livein_empty() && SI->pred_size() == 1) + SinkableBBs.push_back(SI); + + if (SinkableBBs.empty()) + return false; + + bool Changed = false; + + // Track which registers have been modified and used between the end of the + // block and the current instruction. + ModifiedRegs.reset(); + UsedRegs.reset(); + + for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { + MachineInstr *MI = &*I; + ++I; + + // Do not move any instruction across function call. + if (MI->isCall()) + return false; + + if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { + TII->trackRegDefsUses(*MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + // Don't sink the COPY if it would violate a register dependency. + if (ModifiedRegs[DefReg] || ModifiedRegs[SrcReg] || UsedRegs[DefReg]) { + TII->trackRegDefsUses(*MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + + MachineBasicBlock *SuccBB = + getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI); + // Don't sink if we cannot find a single sinkable successor in which Reg + // is live-in. + if (!SuccBB) { + TII->trackRegDefsUses(*MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && + "Unexpected predecessor"); + + // Clear the kill flag if SrcReg is killed between MI and the end of the + // block. + if (UsedRegs[SrcReg]) { + MachineBasicBlock::iterator NI = std::next(MI->getIterator()); + for (MachineInstr &UI : make_range(NI, CurBB.end())) { + if (UI.killsRegister(SrcReg, TRI)) { + UI.clearRegisterKills(SrcReg, TRI); + MI->getOperand(1).setIsKill(true); + break; + } + } + } + + MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); + SuccBB->splice(InsertPos, &CurBB, MI); + SuccBB->removeLiveIn(DefReg); + if (!SuccBB->isLiveIn(SrcReg)) + SuccBB->addLiveIn(SrcReg); + + Changed = true; + ++NumPostRACopySink; + } + return Changed; +} + +bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + ModifiedRegs.resize(TRI->getNumRegs()); + UsedRegs.resize(TRI->getNumRegs()); + + for (auto &BB : MF) + Changed |= tryToSinkCopy(BB, MF, TRI, TII); + + return Changed; +} + Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -868,6 +868,29 @@ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); } +void TargetInstrInfo::trackRegDefsUses(const MachineInstr &MI, + BitVector &ModifiedRegs, + BitVector &UsedRegs, + const TargetRegisterInfo *TRI) const { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) + ModifiedRegs.setBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ModifiedRegs.set(*AI); + } else { + assert(MO.isUse() && "Reg operand not a def and not a use"); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UsedRegs.set(*AI); + } + } +} + bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( const MachineInstr &MI, AliasAnalysis *AA) const { const MachineFunction &MF = *MI.getMF(); Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -80,6 +80,9 @@ cl::desc("Disable Machine LICM")); static cl::opt DisableMachineSink("disable-machine-sink", cl::Hidden, cl::desc("Disable Machine Sinking")); +static cl::opt DisablePostRAMachineSink("disable-postra-machine-sink", + cl::Hidden, + cl::desc("Disable PostRA Machine Sinking")); static cl::opt DisableLSR("disable-lsr", cl::Hidden, cl::desc("Disable Loop Strength Reduction Pass")); static cl::opt DisableConstantHoisting("disable-constant-hoisting", @@ -253,6 +256,9 @@ if (StandardID == &MachineSinkingID) return applyDisable(TargetID, DisableMachineSink); + if (StandardID == &PostRAMachineSinkingID) + return applyDisable(TargetID, DisablePostRAMachineSink); + if (StandardID == &MachineCopyPropagationID) return applyDisable(TargetID, DisableCopyProp); @@ -841,8 +847,10 @@ addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + addPass(&PostRAMachineSinkingID); addPass(&ShrinkWrapID); + } // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only // do so if it hasn't been disabled, substituted, or overridden. Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -336,6 +336,12 @@ SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; + + /// Remember what registers the specified instruction uses and modifies. + void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, + BitVector &UsedRegs, + const TargetRegisterInfo *TRI) const override; + /// AArch64 supports MachineCombiner. bool useMachineCombiner() const override; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4432,6 +4432,33 @@ DelInstrs.push_back(&Root); } +/// Remember what registers the specified instruction uses and modifies. +void AArch64InstrInfo::trackRegDefsUses(const MachineInstr &MI, + BitVector &ModifiedRegs, + BitVector &UsedRegs, + const TargetRegisterInfo *TRI) const { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) + ModifiedRegs.setBitsNotInMask(MO.getRegMask()); + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + // WZR/XZR are not modified even when used as a destination register. + if (Reg != AArch64::WZR && Reg != AArch64::XZR) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ModifiedRegs.set(*AI); + } else { + assert(MO.isUse() && "Reg operand not a def and not a use?!?"); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UsedRegs.set(*AI); + } + } +} + /// \brief Replace csincr-branch sequence by simple conditional branch /// /// Examples: Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -989,33 +989,6 @@ return NextI; } -/// trackRegDefsUses - Remember what registers the specified instruction uses -/// and modifies. -static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs, - BitVector &UsedRegs, - const TargetRegisterInfo *TRI) { - for (const MachineOperand &MO : MI.operands()) { - if (MO.isRegMask()) - ModifiedRegs.setBitsNotInMask(MO.getRegMask()); - - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (MO.isDef()) { - // WZR/XZR are not modified even when used as a destination register. - if (Reg != AArch64::WZR && Reg != AArch64::XZR) - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - ModifiedRegs.set(*AI); - } else { - assert(MO.isUse() && "Reg operand not a def and not a use?!?"); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedRegs.set(*AI); - } - } -} - static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { // Convert the byte-offset used by unscaled into an "element" offset used // by the scaled pair load/store instructions. @@ -1104,7 +1077,7 @@ return false; // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. @@ -1224,7 +1197,7 @@ // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together: bail and keep looking. if (MIOffset % MemSize) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } @@ -1244,7 +1217,7 @@ // the stored value is the same (i.e., WZR). if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) || (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } @@ -1254,7 +1227,7 @@ // immediate offset of merging these instructions is out of range for // a pairwise instruction, bail and keep looking. if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } @@ -1262,7 +1235,7 @@ // can't express the offset of the unscaled input, bail and keep // looking. if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } @@ -1271,7 +1244,7 @@ // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(&MI); continue; } @@ -1308,7 +1281,7 @@ return E; // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. @@ -1484,7 +1457,7 @@ return MBBI; // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is used or modified, we have no match, so // return early. @@ -1536,7 +1509,7 @@ return MBBI; // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + TII->trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); // Otherwise, if the base register is used or modified, we have no match, so // return early. Index: test/CodeGen/AArch64/post-ra-machine-sink.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/post-ra-machine-sink.mir @@ -0,0 +1,365 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s + +--- +# Sink w19 to %bb.1. +# CHECK-LABEL: name: sinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK-NOT: %w19 = COPY killed %w0 +# CHECK-LABEL: bb.1: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 + +name: sinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + Bcc 11, %bb.1, implicit %nzcv + B %bb.2 + + bb.1: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 + + bb.2: + %w0 = COPY %wzr + RET %x0 +... + +--- +# Sink w19 to %bb.2. +# CHECK-LABEL: name: sinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY killed %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +name: sinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %w0 = COPY %wzr + RET %x0 + + bb.2: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 +... + +--- +# Sink w19 and w20 to %bb.1. +# CHECK-LABEL: name: sinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY killed %w0 +# CHECK-LABEL: bb.1: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +# CHECK: renamable %w20 = COPY killed %w1 +name: sinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + renamable %w20 = COPY killed %w1 + + bb.1: + liveins: %w19, %w20 + %w0 = COPY %w19 + %w1 = COPY %w20 + RET %x0 +... + + +# Sink w19 to %bb.1 and w20 to %bb.2. +# CHECK-LABEL: name: sinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY killed %w0 +# CHECK-NOT: renamable %w20 = COPY killed %w1 +# CHECK-LABEL: bb.1: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w0, %w1 +# CHECK: renamable %w20 = COPY killed %w1 +name: sinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY killed %w0 + renamable %w20 = COPY killed %w1 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 + + bb.2: + liveins: %w0, %w20 + %w0 = ADDWrr %w0, %w20 + RET %x0 +... + +# Sink w19 to %bb.3 through %bb.2. +# CHECK-LABEL: name: sinkcopy5 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: %w1 = ADDWrr %w1, %w0 +# CHECK-LABEL: bb.3: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY killed %w0 +name: sinkcopy5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + + bb.1: + liveins: %x0 + %w19 = COPY %wzr + RET %x0 + + bb.2: + liveins: %w0, %w1, %w19 + %w1 = ADDWrr %w1, killed %w0 + + bb.3: + liveins: %w1, %w19 + %w0 = ADDWrr %w1, %w19 + RET %x0 +... + +# Sink w19 to %bb.3, but through %bb.2. +# CHECK-LABEL: name: sinkcopy6 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY %w0 +# CHECK-NOT: renamable %w20 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w1, %w0 +# CHECK: renamable %w19 = COPY %w0 +# CHECK: renamable %w20 = COPY %w19 +name: sinkcopy6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + renamable %w20 = COPY %w19 + Bcc 11, %bb.2, implicit %nzcv + + bb.1: + %w0 = COPY %wzr + RET %x0 + + bb.2: + liveins: %w1, %w20 + %w0 = ADDWrr killed %w1, %w20 + RET %x0 +... + +--- +# Sink w19 regardless of the def of wzr in bb.0. +# CHECK-LABEL: name: sinkcopy7 +# CHECK-LABEL: bb.0: +# CHECK-NOT: renamable %w19 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: renamable %w19 = COPY %wzr +name: sinkcopy7 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + renamable %w19 = COPY %wzr + %wzr = SUBSWri %w1, 1, 0, implicit-def %nzcv + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %x0 = COPY %xzr + RET %x0 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +--- + +# Don't sink w19 as w0 is defined in bb.0. +# CHECK-LABEL: name: donotsinkcopy1 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +# CHECK: %w0 = LDRWui %sp, 0 +name: donotsinkcopy1 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + %w0 = LDRWui %sp, 0 :: (load 4) + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %x0 = COPY %xzr + RET %x0 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as w19 is used in bb.0. +# CHECK-LABEL: name: donotsinkcopy2 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +# CHECK: STRWui %w1, %x19, 0 +name: donotsinkcopy2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + STRWui %w1, %x19, 0 :: (store 4) + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + %x0 = COPY %xzr + RET %x0 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as w19 is used in both %bb.1 and %bb.2. +# CHECK-LABEL: name: donotsinkcopy3 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +name: donotsinkcopy3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w19 + %w0 = COPY %w19 + RET %x0 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + +--- +# Don't sink w19 as %bb.2 has multiple predecessors. +# CHECK-LABEL: name: donotsinkcopy4 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +name: donotsinkcopy4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w0 + %w19 = COPY %w0 + B %bb.2 + + bb.2: + liveins: %w0, %w19 + %w0 = ADDWrr %w0, %w19 + RET %x0 +... + + +# Don't sink w19 after sinking w20. +# CHECK-LABEL: name: donotsinkcopy5 +# CHECK-LABEL: bb.0: +# CHECK: renamable %w19 = COPY %w0 +# CHECK-LABEL: bb.2: +# CHECK: liveins: %w0, %w19 +# CHECK: renamable %w20 = COPY %w19 +name: donotsinkcopy5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %w19 = COPY %w0 + renamable %w20 = COPY %w19 + Bcc 11, %bb.2, implicit %nzcv + + bb.1: + liveins: %w19 + %w0 = COPY %w19 + RET %x0 + + bb.2: + liveins: %w0, %w20 + %w0 = ADDWrr killed %w0, %w20 + RET %x0 +... + +--- +# Don't sink w19 as x19 is live-in in %bb.2. +# CHECK-LABEL: name: donotsinkcopy6 +# CHECK-LABEL: bb.0: +name: donotsinkcopy6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %x0, %w1 + %w1 = SUBSWri %w1, 1, 0, implicit-def %nzcv + renamable %x19 = COPY %x0 + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + liveins: %w19 + %w0 = COPY %w19 + RET %x0 + + bb.2: + liveins: %x0, %x19 + %x0 = ADDXrr %x0, %x19 + RET %x0 +... Index: test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sink-copy-for-shrink-wrap.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; CHECK-LABEL: %bb.0: +; CHECK-NOT: stp +; CHECK-NOT: mov w{{[0-9]+}}, w0 +; CHECK-LABEL: %bb.1: +; CHECK: stp x19 +; CHECK: mov w{{[0-9]+}}, w0 + +define i32 @shrinkwrapme(i32 %paramAcrossCall, i32 %paramNotAcrossCall) { +entry: + %cmp5 = icmp sgt i32 %paramNotAcrossCall, 0 + br i1 %cmp5, label %CallBB, label %Exit +CallBB: + %call = call i32 @fun() + %add = add i32 %call, %paramAcrossCall + ret i32 %add +Exit: + ret i32 0 +} + +declare i32 @fun() Index: test/CodeGen/Hexagon/vect/vect-v4i16.ll =================================================================== --- test/CodeGen/Hexagon/vect/vect-v4i16.ll +++ test/CodeGen/Hexagon/vect/vect-v4i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr -disable-postra-machine-sink < %s | FileCheck %s ; Check that store is post-incremented. ; CHECK: memuh(r{{[0-9]+}}+#6) Index: test/DebugInfo/X86/dbg-value-transfer-order.ll =================================================================== --- test/DebugInfo/X86/dbg-value-transfer-order.ll +++ test/DebugInfo/X86/dbg-value-transfer-order.ll @@ -24,14 +24,14 @@ ; with the Orders insertion point vector. ; CHECK-LABEL: f: # @f -; CHECK: .LBB0_1: # %while.body +; CHECK: .LBB0_2: # %while.body ; CHECK: movl $32, %ecx ; CHECK: testl {{.*}} -; CHECK: jne .LBB0_3 -; CHECK: # %bb.2: # %if.then +; CHECK: jne .LBB0_4 +; CHECK: # %bb.3: # %if.then ; CHECK: callq if_then ; CHECK: movl %eax, %ecx -; CHECK: .LBB0_3: # %if.end +; CHECK: .LBB0_4: # %if.end ; Check that this DEBUG_VALUE comes before the left shift. ; CHECK: #DEBUG_VALUE: bit_offset <- %ecx ; CHECK: .cv_loc 0 1 8 28 # t.c:8:28