Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -260,6 +260,10 @@ bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const override; + bool optimizeFPCmpInstr(MachineInstr &CmpInstr, unsigned Src, + unsigned Src2, int Mask, int Value, + const MachineRegisterInfo *MRI) const; + bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1537,6 +1537,74 @@ } } +bool PPCInstrInfo::optimizeFPCmpInstr(MachineInstr &CmpInstr, unsigned Src, + unsigned Src2, int Mask, int Value, + const MachineRegisterInfo *MRI) const { + // We want to remove the first fcmpu in patterns like this + // fcmpu 0, 1, 1 + // fcmpu 1, 1, 0 + // cror 20, 4, 3 + // bc 4, 20, .LBB0_2 + // Here we look for a pattern like this + // %vreg2 = XXLXORdpz; F8RC:%vreg2 + // %vreg3 = FCMPUD %vreg0, %vreg2; + // %vreg4 = COPY %vreg3:sub_lt; + // %vreg5 = FCMPUD %vreg0, %vreg0; + // %vreg6 = COPY %vreg5:sub_un; <----- + // %vreg7 = CROR %vreg4, %vreg6; + // Then we replace vreg5 with vreg3 in the insn marked + + if (Src != Src2) + return false; + + unsigned CRReg = CmpInstr.getOperand(0).getReg(); + + if (!MRI->hasOneUse(CRReg)) + return false; + + // Op pointers to a MachineOperand which is the only use of CRReg + MachineRegisterInfo::use_iterator Op = MRI->use_begin(CRReg); + unsigned sub = Op->getSubReg(); + if (sub != PPC::sub_un) + return false; + + auto *CopyMI = Op->getParent(); + if (CopyMI->getOpcode() != PPC::COPY) + return false; + + unsigned CopyReg = CopyMI->getOperand(0).getReg(); + if (!MRI->hasOneUse(CopyReg)) + return false; + + MachineRegisterInfo::use_iterator CopyUse = MRI->use_begin(CopyReg); + auto *CROp = CopyUse->getParent(); + if (CROp->getOpcode() != PPC::CROR && CROp->getOpcode() != PPC::CRNOR) + return false; + + unsigned otherReg = + (CROp->getOperand(1).getReg() == CopyReg ? + CROp->getOperand(2).getReg() : + CROp->getOperand(1).getReg()); + + auto *MI = MRI->getUniqueVRegDef(otherReg); + if (MI->getOpcode() != PPC::COPY) + return false; + + auto CopiedReg = MI->getOperand(1).getReg(); + auto *CmpOp = MRI->getUniqueVRegDef(CopiedReg); + if (CmpOp->getOpcode() != CmpInstr.getOpcode()) + return false; + + auto *ZeroFP = MRI->getUniqueVRegDef(CmpOp->getOperand(2).getReg()); + if (ZeroFP->getOpcode() != PPC::XXLXORdpz && + ZeroFP->getOpcode() != PPC::XXLXORspz) + return false; + + Op->setReg(CmpOp->getOperand(0).getReg()); + + return true; +} + bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const { @@ -1549,7 +1617,7 @@ // FP record forms set CR1 based on the execption status bits, not a // comparison with zero. if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD) - return false; + return optimizeFPCmpInstr(CmpInstr, SrcReg, SrcReg2, Mask, Value, MRI); // The record forms set the condition register based on a signed comparison // with zero (so says the ISA manual). This is not as straightforward as it @@ -1560,7 +1628,7 @@ // we are restricted to equality for unsigned 64-bit comparisons and for // signed 32-bit comparisons the applicability is more restricted. bool isPPC64 = Subtarget.isPPC64(); - bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; + bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; Index: test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll =================================================================== --- test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll +++ test/CodeGen/PowerPC/fast-isel-fcmp-nan.ll @@ -1,9 +1,21 @@ ; RUN: llc -mtriple powerpc64le-unknown-linux-gnu -fast-isel -O0 < %s | FileCheck %s +; RUN: llc -mtriple powerpc64le-unknown-linux-gnu -O2 < %s | FileCheck --check-prefix=CHECK-O2 %s + +; Tests that need to check for NaN need to look at two CR bits, so we check for a +; pattern that indicates both CR bits are checked. Otherwise we look for a cmp followed +; by a jump. For O2 tests, we need to make sure that only one comparison is done. define i1 @TestULT(double %t0) { ; CHECK-LABEL: TestULT: -; CHECK: xscmpudp +; CHECK: mfocrf [[REG1:[0-9]*]] +; CHECK: mfocrf [[REG2:[0-9]*]] +; CHECK: or {{[0-9]*}}, [[REG1]], [[REG2]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: fcmpu +; CHECK-O2-NEXT: cror +; CHECK-O2-NEXT: bc +; CHECK-O2: blr entry: %t1 = fcmp ult double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -17,9 +29,13 @@ define i1 @TestULE(double %t0) { ; CHECK-LABEL: TestULE: -; CHECK: fcmpu -; CHECK-NEXT: ble +; CHECK: fcmpu [[REG3:[0-9]*]] +; CHECK-NEXT: ble [[REG3]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: xscmpudp +; CHECK-O2-NEXT: bgt +; CHECK-O2: blr entry: %t1 = fcmp ule double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -33,9 +49,13 @@ define i1 @TestUNE(double %t0) { ; CHECK-LABEL: TestUNE: -; CHECK: fcmpu -; CHECK-NEXT: bne +; CHECK: fcmpu [[REG4:[0-9]*]] +; CHECK-NEXT: bne [[REG4]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: xscmpudp +; CHECK-O2-NEXT: beq +; CHECK-O2: blr entry: %t1 = fcmp une double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -49,8 +69,16 @@ define i1 @TestUEQ(double %t0) { ; CHECK-LABEL: TestUEQ: -; CHECK: xscmpudp +; CHECK: mfocrf [[REG5:[0-9]*]] +; CHECK: mfocrf [[REG6:[0-9]*]] +; CHECK: or {{[0-9]*}}, [[REG5]], [[REG6]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: fcmpu +; CHECK-O2-NEXT: cror +; CHECK-O2-NEXT: bc +; CHECK-O2: blr + entry: %t1 = fcmp ueq double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -64,8 +92,15 @@ define i1 @TestUGT(double %t0) { ; CHECK-LABEL: TestUGT: -; CHECK: xscmpudp +; CHECK: mfocrf [[REG7:[0-9]*]] +; CHECK: mfocrf [[REG8:[0-9]*]] +; CHECK: or {{[0-9]*}}, [[REG7]], [[REG8]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: fcmpu +; CHECK-O2-NEXT: cror +; CHECK-O2-NEXT: bc +; CHECK-O2: blr entry: %t1 = fcmp ugt double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -79,9 +114,13 @@ define i1 @TestUGE(double %t0) { ; CHECK-LABEL: TestUGE: -; CHECK: fcmpu -; CHECK-NEXT: bge +; CHECK: fcmpu [[REG13:[0-9]*]] +; CHECK-NEXT: bge [[REG13]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: xscmpudp +; CHECK-O2-NEXT: blt +; CHECK-O2: blr entry: %t1 = fcmp uge double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -95,9 +134,13 @@ define i1 @TestOLT(double %t0) { ; CHECK-LABEL: TestOLT: -; CHECK: fcmpu -; CHECK-NEXT: blt +; CHECK: fcmpu [[REG9:[0-9]*]] +; CHECK-NEXT: blt [[REG9]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: xscmpudp +; CHECK-O2-NEXT: bge +; CHECK-O2: blr entry: %t1 = fcmp olt double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -112,7 +155,20 @@ define i1 @TestOLE(double %t0) { ; CHECK-LABEL: TestOLE: ; CHECK: xscmpudp +; CHECK: mfocrf +; CHECK: rlwinm +; CHECK: xori +; CHECK: xscmpudp +; CHECK: mfocrf +; CHECK: clrlwi +; CHECK: xori +; CHECK: and ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: fcmpu +; CHECK-O2-NEXT: crnor +; CHECK-O2-NEXT: bc +; CHECK-O2: blr entry: %t1 = fcmp ole double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -127,7 +183,20 @@ define i1 @TestONE(double %t0) { ; CHECK-LABEL: TestONE: ; CHECK: xscmpudp +; CHECK: mfocrf +; CHECK: rlwinm +; CHECK: xori +; CHECK: xscmpudp +; CHECK: mfocrf +; CHECK: clrlwi +; CHECK: xori +; CHECK: and ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: fcmpu +; CHECK-O2-NEXT: crnor +; CHECK-O2-NEXT: bc +; CHECK-O2: blr entry: %t1 = fcmp one double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -141,9 +210,13 @@ define i1 @TestOEQ(double %t0) { ; CHECK-LABEL: TestOEQ: -; CHECK: fcmpu -; CHECK-NEXT: beq +; CHECK: fcmpu [[REG11:[0-9]*]] +; CHECK-NEXT: beq [[REG11]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: xscmpudp +; CHECK-O2-NEXT: bne +; CHECK-O2: blr entry: %t1 = fcmp oeq double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -157,9 +230,13 @@ define i1 @TestOGT(double %t0) { ; CHECK-LABEL: TestOGT: -; CHECK: fcmpu -; CHECK-NEXT: bgt +; CHECK: fcmpu [[REG12:[0-9]*]] +; CHECK-NEXT: bgt [[REG12]] ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: xscmpudp +; CHECK-O2-NEXT: ble +; CHECK-O2: blr entry: %t1 = fcmp ogt double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad @@ -174,7 +251,20 @@ define i1 @TestOGE(double %t0) { ; CHECK-LABEL: TestOGE: ; CHECK: xscmpudp +; CHECK: mfocrf +; CHECK: rlwinm +; CHECK: xori +; CHECK: xscmpudp +; CHECK: mfocrf +; CHECK: clrlwi +; CHECK: xori +; CHECK: and ; CHECK: blr +; CHECK-O2: xxlxor +; CHECK-O2-NEXT: fcmpu +; CHECK-O2-NEXT: crnor +; CHECK-O2-NEXT: bc +; CHECK-O2: blr entry: %t1 = fcmp oge double %t0, 0.000000e+00 br i1 %t1, label %good, label %bad