Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -6298,14 +6298,30 @@ return isHighLatencyDef(DefMI->getOpcode()); } -static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { - assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators"); +static bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) { + assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && + "Reassociation needs binary operators"); const MachineOperand &Op1 = Inst.getOperand(1); const MachineOperand &Op2 = Inst.getOperand(2); const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - // We need virtual register definitions. + // Integer binary math/logic instructions have a third source operand: + // the EFLAGS register. That operand must be both defined here and never + // used; ie, it must be dead. If the EFLAGS operand is live, then we can + // not change anything because rearranging the operands could affect other + // instructions that depend on the exact status flags (zero, sign, etc.) + // that are set by using these particular operands with this operation. + if (Inst.getNumOperands() == 4) { + assert(Inst.getOperand(3).isReg() && + Inst.getOperand(3).getReg() == X86::EFLAGS && + "Unexpected operand in reassociable instruction"); + if (!Inst.getOperand(3).isDead()) + return false; + } + + // We need virtual register definitions for the operands that we will + // reassociate. MachineInstr *MI1 = nullptr; MachineInstr *MI2 = nullptr; if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) @@ -6338,7 +6354,7 @@ // operands in the same basic block as Inst. // 3. The previous instruction's result must only be used by Inst. if (MI1->getOpcode() == AssocOpcode && - hasVirtualRegDefsInBasicBlock(*MI1, MBB) && + hasReassociableOperands(*MI1, MBB) && MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) return true; @@ -6350,6 +6366,10 @@ // 2. Other math / logic operations (and, or) static bool isAssociativeAndCommutative(const MachineInstr &Inst) { switch (Inst.getOpcode()) { + case X86::IMUL16rr: + case X86::IMUL32rr: + case X86::IMUL64rr: + return true; case X86::ADDSDrr: case X86::ADDSSrr: case X86::VADDSDrr: @@ -6375,7 +6395,7 @@ // operands in the same basic block. // 3. The instruction must have a reassociable sibling. if (isAssociativeAndCommutative(Inst) && - hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && + hasReassociableOperands(Inst, Inst.getParent()) && hasReassocSibling(Inst, Commuted)) return true; Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 < %s | FileCheck %s + +; Verify that integer multiplies are reassociated. The first multiply in +; each test should be independent of the result of the preceding add (lea). + +define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) { +; CHECK-LABEL: reassociate_muls_i16: +; CHECK: # BB#0: +; CHECK-NEXT: leal (%rdi,%rsi), %eax +; CHECK-NEXT: imull %ecx, %edx +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: retq + %t0 = add i16 %x0, %x1 + %t1 = mul i16 %x2, %t0 + %t2 = mul i16 %x3, %t1 + ret i16 %t2 +} + +define i32 @reassociate_muls_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: reassociate_muls_i32: +; CHECK: # BB#0: +; CHECK-NEXT: leal (%rdi,%rsi), %eax +; CHECK-NEXT: imull %ecx, %edx +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: retq + %t0 = add i32 %x0, %x1 + %t1 = mul i32 %x2, %t0 + %t2 = mul i32 %x3, %t1 + ret i32 %t2 +} + +define i64 @reassociate_muls_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { +; CHECK-LABEL: reassociate_muls_i64: +; CHECK: # BB#0: +; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: imulq %rcx, %rdx +; CHECK-NEXT: imulq %rdx, %rax +; CHECK-NEXT: retq + %t0 = add i64 %x0, %x1 + %t1 = mul i64 %x2, %t0 + %t2 = mul i64 %x3, %t1 + ret i64 %t2 +}