diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -332,7 +332,8 @@ bool isRegElgibleForForwarding(const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, - bool &IsFwdFeederRegKilled) const; + bool &IsFwdFeederRegKilled, + bool &SeenIntermediateUse) const; unsigned getSpillTarget() const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; @@ -718,6 +719,8 @@ int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + bool optimizeCmpPostRA(MachineInstr &MI) const; + /// Get the base operand and byte offset of an instruction that reads/writes /// memory. bool getMemOperandsWithOffsetWidth( diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2767,6 +2767,67 @@ return true; } +bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const { + MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo(); + if (MRI->isSSA()) + return false; + + Register SrcReg, SrcReg2; + int64_t CmpMask, CmpValue; + // CMPWI, CMPLWI, CMPDI, CMPLDI, + if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue) || SrcReg2 || + CmpValue) + return false; + + // CmpMI can't be deleted if it has implicit def. + if (CmpMI.hasImplicitDef()) + return false; + + bool OtherUse_SrcReg = false; + MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, OtherUse_SrcReg); + if (!SrcMI) + return false; + + bool IsFwdFeederRegKilled = false; + MachineOperand RegMO = CmpMI.getOperand(0); + Register CRReg = RegMO.getReg(); + if (CRReg != PPC::CR0) + return false; + + // Make sure there is no def/use of CRReg between SrcMI and CmpMI. + bool OtherUse_CRReg = false; + if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, + IsFwdFeederRegKilled, OtherUse_CRReg) || + SrcMI->definesRegister(CRReg) || OtherUse_CRReg) + return false; + + int OldOpc = SrcMI->getOpcode(); + int NewOpC = PPC::getRecordFormOpcode(OldOpc); + if (NewOpC == -1) + return false; + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(SrcMI->dump()); + + const MCInstrDesc &NewDesc = get(NewOpC); + SrcMI->setDesc(NewDesc); + MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI) + .addReg(CRReg, RegState::ImplicitDefine); + + // Fix up killed/dead flag for SrcReg after transformation. + if (OtherUse_SrcReg || CmpMI.getOperand(1).isKill()) + fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg); + + assert(SrcMI->definesRegister(PPC::CR0) && + "Record-form instruction does not define cr0?"); + + LLVM_DEBUG(dbgs() << "with: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(CmpMI.dump()); + return true; +} + bool PPCInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -4468,7 +4529,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding( const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, - bool &IsFwdFeederRegKilled) const { + bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const { // x = addi y, imm // ... // z = lfdx 0, x -> z = lfd imm(y) @@ -4490,6 +4551,8 @@ return false; else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) IsFwdFeederRegKilled = true; + if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) + SeenIntermediateUse = true; // Made it to DefMI without encountering a clobber. if ((&*It) == &DefMI) break; @@ -4929,9 +4992,10 @@ return false; bool IsFwdFeederRegKilled = false; + bool SeenIntermediateUse = false; // Check if the RegMO can be forwarded to MI. if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI, - IsFwdFeederRegKilled)) + IsFwdFeederRegKilled, SeenIntermediateUse)) return false; // Get killed info in case fixup needed after transformation. diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -38,6 +38,8 @@ "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); +STATISTIC(NumCmpsInPreEmit, + "Number of compares eliminated in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -508,6 +510,13 @@ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } + if (TII->optimizeCmpPostRA(MI)) { + Changed = true; + NumCmpsInPreEmit++; + LLVM_DEBUG(dbgs() << "Optimize compare by using record form: "); + LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + } } // Eliminate conditional branch based on a constant CR bit by diff --git a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll --- a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll +++ b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll @@ -55,8 +55,7 @@ ; CHECK-NEXT: # %bb.3: # %invcont23 ; CHECK-NEXT: # ; CHECK-NEXT: ld 3, 128(31) -; CHECK-NEXT: sub 30, 30, 3 -; CHECK-NEXT: cmpldi 30, 0 +; CHECK-NEXT: sub. 30, 30, 3 ; CHECK-NEXT: bne 0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %cleanup ; CHECK-NEXT: ld 30, 160(31) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll --- a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll +++ b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll @@ -717,9 +717,11 @@ ; partially redundant case define void @func28(i32 signext %a) { ; CHECK-LABEL: @func28 -; CHECK: cmplwi [[REG1:[0-9]+]], [[REG2:[0-9]+]] +; CHECK: mr. 30, 3 +; CHECK-NOT: cmplwi ; CHECK: .[[LABEL2:[A-Z0-9_]+]]: -; CHECK: cmpwi [[REG1]], [[REG2]] +; CHECK: mr. 30, 3 +; CHECK-NOT: cmpwi ; CHECK: ble 0, .[[LABEL1:[A-Z0-9_]+]] ; CHECK-NOT: cmp ; CHECK: bne 0, .[[LABEL2]] diff --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll --- a/llvm/test/CodeGen/PowerPC/csr-split.ll +++ b/llvm/test/CodeGen/PowerPC/csr-split.ll @@ -12,8 +12,7 @@ define dso_local signext i32 @test1(i32* %b) local_unnamed_addr { ; CHECK-P10-LABEL: test1: -; CHECK-P10: .localentry test1, 1 -; CHECK-P10-NEXT: # %bb.0: # %entry +; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mflr r0 ; CHECK-P10-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P10-NEXT: .cfi_offset lr, 16 @@ -118,8 +117,7 @@ define dso_local signext i32 @test2(i32* %p1) local_unnamed_addr { ; CHECK-P10-LABEL: test2: -; CHECK-P10: .localentry test2, 1 -; CHECK-P10-NEXT: # %bb.0: # %entry +; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mflr r0 ; CHECK-P10-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P10-NEXT: .cfi_offset lr, 16 @@ -127,9 +125,8 @@ ; CHECK-P10-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P10-NEXT: std r0, 16(r1) ; CHECK-P10-NEXT: stdu r1, -48(r1) -; CHECK-P10-NEXT: mr r30, r3 +; CHECK-P10-NEXT: mr. r30, r3 ; CHECK-P10-NEXT: li r3, 0 -; CHECK-P10-NEXT: cmpldi r30, 0 ; CHECK-P10-NEXT: beq cr0, .LBB1_3 ; CHECK-P10-NEXT: # %bb.1: # %if.end ; CHECK-P10-NEXT: plwa r4, a@PCREL(0), 1 @@ -156,9 +153,8 @@ ; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: std r0, 16(r1) ; CHECK-PWR9-NEXT: stdu r1, -48(r1) -; CHECK-PWR9-NEXT: mr r30, r3 +; CHECK-PWR9-NEXT: mr. r30, r3 ; CHECK-PWR9-NEXT: li r3, 0 -; CHECK-PWR9-NEXT: cmpldi r30, 0 ; CHECK-PWR9-NEXT: beq cr0, .LBB1_3 ; CHECK-PWR9-NEXT: # %bb.1: # %if.end ; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha @@ -234,8 +230,7 @@ define dso_local i8* @test3(i8** nocapture %p1, i8 zeroext %p2) local_unnamed_addr { ; CHECK-P10-LABEL: test3: -; CHECK-P10: .localentry test3, 1 -; CHECK-P10-NEXT: # %bb.0: # %entry +; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mflr r0 ; CHECK-P10-NEXT: .cfi_def_cfa_offset 64 ; CHECK-P10-NEXT: .cfi_offset lr, 16 diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -580,8 +580,7 @@ ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: stxv v2, -32(r1) # 16-byte Folded Spill ; CHECK-O0-NEXT: std r3, -8(r1) # 8-byte Folded Spill -; CHECK-O0-NEXT: mr r3, r7 -; CHECK-O0-NEXT: cmpwi r3, 0 +; CHECK-O0-NEXT: mr. r3, r7 ; CHECK-O0-NEXT: beq cr0, .LBB5_2 ; CHECK-O0-NEXT: # %bb.1: # %if.then ; CHECK-O0-NEXT: xxsetaccz acc0 @@ -641,8 +640,7 @@ ; CHECK-O0-BE: # %bb.0: # %entry ; CHECK-O0-BE-NEXT: stxv v2, -32(r1) # 16-byte Folded Spill ; CHECK-O0-BE-NEXT: std r3, -8(r1) # 8-byte Folded Spill -; CHECK-O0-BE-NEXT: mr r3, r7 -; CHECK-O0-BE-NEXT: cmpwi r3, 0 +; CHECK-O0-BE-NEXT: mr. r3, r7 ; CHECK-O0-BE-NEXT: beq cr0, .LBB5_2 ; CHECK-O0-BE-NEXT: # %bb.1: # %if.then ; CHECK-O0-BE-NEXT: xxsetaccz acc0 @@ -1297,9 +1295,8 @@ ; CHECK-O0-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; CHECK-O0-NEXT: std r5, -168(r1) # 8-byte Folded Spill ; CHECK-O0-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-O0-NEXT: mr r3, r4 +; CHECK-O0-NEXT: mr. r3, r4 ; CHECK-O0-NEXT: stw r3, -148(r1) # 4-byte Folded Spill -; CHECK-O0-NEXT: cmpwi r3, 0 ; CHECK-O0-NEXT: ble cr0, .LBB9_2 ; CHECK-O0-NEXT: # %bb.1: # %for.body.preheader ; CHECK-O0-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload @@ -1419,9 +1416,8 @@ ; CHECK-O0-BE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; CHECK-O0-BE-NEXT: std r5, -168(r1) # 8-byte Folded Spill ; CHECK-O0-BE-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-O0-BE-NEXT: mr r3, r4 +; CHECK-O0-BE-NEXT: mr. r3, r4 ; CHECK-O0-BE-NEXT: stw r3, -148(r1) # 4-byte Folded Spill -; CHECK-O0-BE-NEXT: cmpwi r3, 0 ; CHECK-O0-BE-NEXT: ble cr0, .LBB9_2 ; CHECK-O0-BE-NEXT: # %bb.1: # %for.body.preheader ; CHECK-O0-BE-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/optcmp.ll b/llvm/test/CodeGen/PowerPC/optcmp.ll --- a/llvm/test/CodeGen/PowerPC/optcmp.ll +++ b/llvm/test/CodeGen/PowerPC/optcmp.ll @@ -40,20 +40,18 @@ define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 { ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slw 4, 3, 4 +; CHECK-NEXT: slw. 4, 3, 4 ; CHECK-NEXT: li 6, 0 ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: cmpwi 4, 0 ; CHECK-NEXT: stw 4, 0(5) ; CHECK-NEXT: iselgt 3, 3, 6 ; CHECK-NEXT: blr ; ; CHECK-NO-ISEL-LABEL: foo2: ; CHECK-NO-ISEL: # %bb.0: # %entry -; CHECK-NO-ISEL-NEXT: slw 4, 3, 4 +; CHECK-NO-ISEL-NEXT: slw. 4, 3, 4 ; CHECK-NO-ISEL-NEXT: li 6, 0 ; CHECK-NO-ISEL-NEXT: li 3, 1 -; CHECK-NO-ISEL-NEXT: cmpwi 4, 0 ; CHECK-NO-ISEL-NEXT: stw 4, 0(5) ; CHECK-NO-ISEL-NEXT: bclr 12, 1, 0 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -2947,10 +2947,9 @@ ; LE-P10-O0-NEXT: std r0, 16(r1) ; LE-P10-O0-NEXT: hashst r0, -8(r1) ; LE-P10-O0-NEXT: stdu r1, -64(r1) -; LE-P10-O0-NEXT: mr r4, r3 +; LE-P10-O0-NEXT: mr. r4, r3 ; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill ; LE-P10-O0-NEXT: li r3, 0 -; LE-P10-O0-NEXT: cmpdi r4, 0 ; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill ; LE-P10-O0-NEXT: beq cr0, .LBB2_2 ; LE-P10-O0-NEXT: # %bb.1: # %if.end @@ -2980,10 +2979,9 @@ ; LE-P9-O0-NEXT: std r0, 16(r1) ; LE-P9-O0-NEXT: hashst r0, -8(r1) ; LE-P9-O0-NEXT: stdu r1, -128(r1) -; LE-P9-O0-NEXT: mr r4, r3 +; LE-P9-O0-NEXT: mr. r4, r3 ; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill ; LE-P9-O0-NEXT: li r3, 0 -; LE-P9-O0-NEXT: cmpdi r4, 0 ; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill ; LE-P9-O0-NEXT: beq cr0, .LBB2_2 ; LE-P9-O0-NEXT: # %bb.1: # %if.end @@ -3013,10 +3011,9 @@ ; LE-P8-O0-NEXT: std r0, 16(r1) ; LE-P8-O0-NEXT: hashst r0, -8(r1) ; LE-P8-O0-NEXT: stdu r1, -128(r1) -; LE-P8-O0-NEXT: mr r4, r3 +; LE-P8-O0-NEXT: mr. r4, r3 ; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill ; LE-P8-O0-NEXT: li r3, 0 -; LE-P8-O0-NEXT: cmpdi r4, 0 ; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill ; LE-P8-O0-NEXT: beq cr0, .LBB2_2 ; LE-P8-O0-NEXT: # %bb.1: # %if.end diff --git a/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll b/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll --- a/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll +++ b/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test2 define i8 @test2(i32 %a) { entry: -; CHECK: rlwinm [[REG:[0-9]+]], {{[0-9]+}}, 0, 28, 23 -; CHECK: cmplwi [[REG]], 0 +; CHECK: rlwinm. [[REG:[0-9]+]], {{[0-9]+}}, 0, 28, 23 +; CHECK-NOT: cmplwi [[REG]], 0 ; CHECK: beq 0 %0 = and i32 %a, -241 %1 = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -137,8 +137,7 @@ define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) { ; CHECK-LABEL: all_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: or 3, 3, 4 -; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: or. 3, 3, 4 ; CHECK-NEXT: blt 0, .LBB9_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: li 3, 4 @@ -287,8 +286,7 @@ define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) { ; CHECK-LABEL: any_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: and. 3, 3, 4 ; CHECK-NEXT: blt 0, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: li 3, 4 diff --git a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll @@ -358,8 +358,7 @@ define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b_0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slw r5, r3, r4 -; CHECK-NEXT: cmpwi r5, 0 +; CHECK-NEXT: slw. r5, r3, r4 ; CHECK-NEXT: ble cr0, .LBB12_2 ; CHECK-NEXT: # %bb.1: # %return ; CHECK-NEXT: extsw r3, r4