diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -248,6 +248,10 @@ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const override; + bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -225,6 +225,26 @@ return Latency; } +/// This is an architecture-specific helper function of reassociateOps. +/// Set special operand attributes for new instructions after reassociation. +void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, + MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { + // Propagate FP flags from the original instructions. + // But clear poison-generating flags because those may not be valid now. + uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + NewMI1.setFlags(IntersectedFlags); + NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); + NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); + NewMI1.clearFlag(MachineInstr::MIFlag::IsExact); + + NewMI2.setFlags(IntersectedFlags); + NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap); + NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap); + NewMI2.clearFlag(MachineInstr::MIFlag::IsExact); +} + // This function does not list all associative and commutative operations, but // only those worth feeding through the machine combiner in an attempt to // reduce the critical path. Mostly, this means floating-point operations, @@ -258,7 +278,8 @@ case PPC::QVFMUL: case PPC::QVFMULS: case PPC::QVFMULSs: - return true; + return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && + Inst.getFlag(MachineInstr::MIFlag::FmNsz); default: return false; } @@ -272,10 +293,6 @@ if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; - // FP reassociation is only legal when we don't need strict IEEE semantics. - if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) - return false; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); } diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll --- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll +++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR -; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR +; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -14,9 +14,9 @@ ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %t0, %x2 - %t2 = fadd float %t1, %x3 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 ret float %t2 } @@ -28,9 +28,9 @@ ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %x2, %t0 - %t2 = fadd float %t1, %x3 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %t1, %x3 ret float %t2 } @@ -42,9 +42,9 @@ ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %t0, %x2 - %t2 = fadd float %x3, %t1 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %x3, %t1 ret float %t2 } @@ -56,9 +56,9 @@ ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %x2, %t0 - %t2 = fadd float %x3, %t1 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %x3, %t1 ret float %t2 } @@ -77,13 +77,13 @@ ; CHECK: fadds 1, [[REG2]], 8 ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %t0, %x2 - %t2 = fadd float %t1, %x3 - %t3 = fadd float %t2, %x4 - %t4 = fadd float %t3, %x5 - %t5 = fadd float %t4, %x6 - %t6 = fadd float %t5, %x7 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 + %t3 = fadd reassoc nsz float %t2, %x4 + %t4 = fadd reassoc nsz float %t3, %x5 + %t5 = fadd reassoc nsz float %t4, %x6 + %t6 = fadd reassoc nsz float %t5, %x7 ret float %t6 } @@ -100,9 +100,9 @@ ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %t0, %x2 - %t2 = fadd <4 x float> %t1, %x3 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 ret <4 x float> %t2 } @@ -117,9 +117,9 @@ ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %x2, %t0 - %t2 = fadd <4 x float> %t1, %x3 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 ret <4 x float> %t2 } @@ -134,9 +134,9 @@ ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %t0, %x2 - %t2 = fadd <4 x float> %x3, %t1 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 ret <4 x float> %t2 } @@ -151,9 +151,9 @@ ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %x2, %t0 - %t2 = fadd <4 x float> %x3, %t1 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 ret <4 x float> %t2 }