Index: lib/CodeGen/TwoAddressInstructionPass.cpp =================================================================== --- lib/CodeGen/TwoAddressInstructionPass.cpp +++ lib/CodeGen/TwoAddressInstructionPass.cpp @@ -102,6 +102,8 @@ bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); + MachineInstr *getSingleDefUse(unsigned Reg, bool def); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, @@ -309,6 +311,26 @@ return true; } +// getSingleDefUse -- If def is true, return the MachineInstr* if it is the single +// def of the Reg. If def is false, return the MachineInstr* if it is the single +// use of the Reg. +MachineInstr* TwoAddressInstructionPass::getSingleDefUse(unsigned Reg, bool def) { + MachineInstr *ret = NULL; + for (MachineOperand &MO : MRI->reg_operands(Reg)) { + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB || MI->isDebugValue()) + continue; + if ((def && MO.isDef()) || + (!def && MO.isUse())) { + if (!ret) + ret = MI; + else if (ret != MI) + ret = NULL; + } + } + return ret; +} + /// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -574,6 +596,25 @@ if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; + // Look for situation like this: + // %reg101 = MOV %reg100 + // %reg102 = ... + // %reg103 = ADD %reg102, %reg101 + // %reg100 = MOV %reg103 + // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. + MachineInstr *defB = getSingleDefUse(regB, true); + MachineInstr *defC = getSingleDefUse(regC, true); + MachineInstr *useA = getSingleDefUse(regA, false); + if (useA && defC && + useA->isCopy() && defC->isCopy() && + useA->getOperand(0).getReg() == defC->getOperand(1).getReg()) + return true; + + if (useA && defB && + useA->isCopy() && defB->isCopy() && + useA->getOperand(0).getReg() == defB->getOperand(1).getReg()) + return false; + // Since there are no intervening uses for both registers, then commute // if the def of regC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; Index: test/CodeGen/X86/twoaddr-coalesce-3.ll =================================================================== --- test/CodeGen/X86/twoaddr-coalesce-3.ll +++ test/CodeGen/X86/twoaddr-coalesce-3.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -march=x86 | grep mov | count 4 + +@M = common global i32 0, align 4 +@total = common global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define void @foo() #0 { +entry: + %0 = load i32* @M, align 4, !tbaa !1 + %cmp3 = icmp sgt i32 %0, 0 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %total.promoted = load i32* @total, align 4, !tbaa !1 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ] + %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %div = sdiv i32 %i.04, 2 + %add = add nsw i32 %div, %add5 + %inc = add nuw nsw i32 %i.04, 1 + %cmp = icmp slt i32 %inc, %0 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + store i32 %add, i32* @total, align 4, !tbaa !1 + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.7.0 (trunk 230041)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"}