Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -98,6 +98,10 @@ DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); +static cl::opt DisableNAPhysCopyOpt( + "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable non-allocatable physical register copy optimization")); + // Limit the number of PHI instructions to process // in PeepholeOptimizer::getNextSource. static cl::opt RewritePHILimit( @@ -111,6 +115,7 @@ STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); +STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { class ValueTrackerResult; @@ -162,12 +167,21 @@ DenseMap &ImmDefMIs); /// \brief If copy instruction \p MI is a virtual register copy, track it in - /// the set \p CopiedFromRegs and \p CopyMIs. If this virtual register was + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was /// previously seen as a copy, replace the uses of this copy with the /// previously seen copy's destination register. bool foldRedundantCopy(MachineInstr *MI, - SmallSet &CopiedFromRegs, - DenseMap &CopyMIs); + SmallSet &CopySrcRegs, + DenseMap &CopyMIs); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool foldRedundantNAPhysCopy( + MachineInstr *MI, + DenseMap &NAPhysToVirtMIs); bool isLoadFoldable(MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates); @@ -1356,9 +1370,9 @@ // // Should replace %vreg2 uses with %vreg1:sub1 bool PeepholeOptimizer::foldRedundantCopy( - MachineInstr *MI, - SmallSet &CopySrcRegs, - DenseMap &CopyMIs) { + MachineInstr *MI, + SmallSet &CopySrcRegs, + DenseMap &CopyMIs) { assert(MI->isCopy()); unsigned SrcReg = MI->getOperand(1).getReg(); @@ -1400,6 +1414,59 @@ return true; } +bool PeepholeOptimizer::foldRedundantNAPhysCopy( + MachineInstr *MI, DenseMap &NAPhysToVirtMIs) { + assert(MI->isCopy()); + + if (DisableNAPhysCopyOpt) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + !MRI->isAllocatable(SrcReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + DEBUG(dbgs() << "%vreg = COPY %PHYSREG::: " << *MI << '\n'); + // %vreg = COPY %PHYSREG + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. + NAPhysToVirtMIs.insert({SrcReg, MI}); + return false; + } + + if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && + TargetRegisterInfo::isPhysicalRegister(DstReg) && + !MRI->isAllocatable(DstReg))) + return false; + + DEBUG(dbgs() << "%PHYSREG = COPY %vreg::: " << *MI << '\n'); + // %PHYSREG = COPY %vreg + auto PrevCopy = NAPhysToVirtMIs.find(DstReg); + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. + DEBUG(dbgs() << "*** Can't erase " << *MI << '\n'); + return false; + } + + unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. + DEBUG(dbgs() << "*** Erasing " << *MI << '\n'); + ++NumNAPhysCopies; + return true; + } + + // Potential missed optimization opportunity: we saw a different virtual + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. + DEBUG(dbgs() << "*** Missed opportunity " << *MI << '\n'); + NAPhysToVirtMIs.erase(PrevCopy); + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; @@ -1433,6 +1500,13 @@ DenseMap ImmDefMIs; SmallSet FoldAsLoadDefCandidates; + // Track when a non-allocatable physical register is copied to a virtual + // register so that useless moves can be removed. + // + // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` + // without any intervening re-definition of %PHYSREG. + DenseMap NAPhysToVirtMIs; + // Set of virtual registers that are copied from. SmallSet CopySrcRegs; DenseMap CopySrcMIs; @@ -1453,10 +1527,47 @@ if (MI->isLoadFoldBarrier()) FoldAsLoadDefCandidates.clear(); - if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) + if (MI->isPosition() || MI->isPHI()) + continue; + + if (!MI->isCopy()) { + DEBUG(dbgs() << "Looking at operands for " << *MI << '\n'); + for (const auto &Op : MI->operands()) { + DEBUG(dbgs() << " " + << " reg=" << Op.isReg() + << " def=" << (Op.isReg() ? Op.isDef() : false) + << " imp=" << (Op.isReg() ? Op.isImplicit() : false) + << '\n'); + // Visit all operands: definitions can be implicit or explicit. + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Op.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI->isAllocatable(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. + DEBUG(dbgs() << " ------ Invalidating ^^^\n"); + NAPhysToVirtMIs.erase(Def); + } + } + } + } + } + + if (MI->isImplicitDef() || MI->isKill()) + continue; + + if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { + // Blow away all non-allocatable physical registers knowledge since we + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. + DEBUG(dbgs() + << "*** Inline asm or side effects, blowing away phys to virt\n"); + NAPhysToVirtMIs.clear(); continue; + } if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || @@ -1479,7 +1590,9 @@ continue; } - if (MI->isCopy() && foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs)) { + if (MI->isCopy() && + (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); MI->eraseFromParent(); Changed = true; Index: test/CodeGen/X86/incdec-and-branch.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/incdec-and-branch.ll @@ -0,0 +1,54 @@ +; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s + +; inc / dec generate flags which can be used to branch. Make sure the flags are +; used directly, instead of needlessly using lahf. + +@L = external global i32 +@M = external global i8 + +; TODO other sizes? + +; CHECK-NOT: seto +; CHECK-NOT: lahf +; CHECK-NOT: sahf +; CHECK-NOT: pushf +; CHECK-NOT: popf + +define i1 @plus_one() { +entry: + %loaded_L = load i32, i32* @L + %val = add nsw i32 %loaded_L, 1 ; N.B. + store i32 %val, i32* @L + %loaded_M = load i8, i8* @M + %masked = and i8 %loaded_M, 8 + %M_is_true = icmp ne i8 %masked, 0 + %L_is_false = icmp eq i32 %val, 0 + %cond = and i1 %L_is_false, %M_is_true + br i1 %cond, label %exit2, label %exit + +exit: + ret i1 true + +exit2: + ret i1 false +} + +define i1 @minus_one() { +entry: + %loaded_L = load i32, i32* @L + %val = add nsw i32 %loaded_L, -1 ; N.B. + store i32 %val, i32* @L + %loaded_M = load i8, i8* @M + %masked = and i8 %loaded_M, 8 + %M_is_true = icmp ne i8 %masked, 0 + %L_is_false = icmp eq i32 %val, 0 + %cond = and i1 %L_is_false, %M_is_true + br i1 %cond, label %exit2, label %exit + +exit: + ret i1 true + +exit2: + ret i1 false +}