diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -634,6 +634,8 @@ int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + bool optimizeCmpPostRA(MachineInstr &MI) const; + /// Get the base operand and byte offset of an instruction that reads/writes /// memory. bool getMemOperandsWithOffsetWidth( diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2766,6 +2766,71 @@ return true; } +bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const { + MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo(); + if (MRI->isSSA()) + return false; + + Register SrcReg, SrcReg2; + int64_t CmpMask, CmpValue; + if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue) || + SrcReg.isVirtual() || SrcReg2.isVirtual() || SrcReg2 || CmpValue) + return false; + + // CmpMI can't be deleted if it has implicit def. + if (CmpMI.hasImplicitDef()) + return false; + + bool OtherIntermediateUse = false; + MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, OtherIntermediateUse); + if (OtherIntermediateUse || !SrcMI || SrcMI->getParent() != CmpMI.getParent()) + return false; + + bool IsFwdFeederRegKilled = false; + MachineOperand RegMO = CmpMI.getOperand(0); + Register CRReg = RegMO.getReg(); + if (CRReg != PPC::CR0) + return false; + + // Make sure there is no definition of CRReg between SrcMI and CmpMI. + if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, + IsFwdFeederRegKilled) || + SrcMI->definesRegister(CRReg)) + return false; + + int OldOpc = SrcMI->getOpcode(); + int NewOpC = PPC::getRecordFormOpcode(OldOpc); + if (NewOpC == -1 || OldOpc == NewOpC) + return false; + + for (auto &CompareUseMI : MRI->use_instructions(CRReg)) { + unsigned UseOpc = CompareUseMI.getOpcode(); + if (UseOpc != PPC::BCC) + return false; + } + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(SrcMI->dump()); + + const MCInstrDesc &NewDesc = get(NewOpC); + SrcMI->setDesc(NewDesc); + MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI) + .addReg(CRReg, RegState::ImplicitDefine); + + // Fix up killed/dead flag after transformation. + if (IsFwdFeederRegKilled || RegMO.isKill()) + fixupIsDeadOrKill(SrcMI, &CmpMI, CRReg); + + assert(SrcMI->definesRegister(CRReg) && + "Record-form instruction does not define cr0?"); + + LLVM_DEBUG(dbgs() << "with: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(CmpMI.dump()); + return true; +} + bool PPCInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -38,6 +38,8 @@ "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); +STATISTIC(NumCmpsInPreEmit, + "Number of compares eliminated in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -508,6 +510,13 @@ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } + if (TII->optimizeCmpPostRA(MI)) { + Changed = true; + NumCmpsInPreEmit++; + LLVM_DEBUG(dbgs() << "Optimize compare by using record form: "); + LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + } } // Eliminate conditional branch based on a constant CR bit by diff --git a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll --- a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll +++ b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll @@ -55,8 +55,7 @@ ; CHECK-NEXT: # %bb.3: # %invcont23 ; CHECK-NEXT: # ; CHECK-NEXT: ld 3, 128(31) -; CHECK-NEXT: sub 30, 30, 3 -; CHECK-NEXT: cmpldi 30, 0 +; CHECK-NEXT: sub. 30, 30, 3 ; CHECK-NEXT: bne 0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %cleanup ; CHECK-NEXT: ld 30, 160(31) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll --- a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll +++ b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll @@ -717,9 +717,11 @@ ; partially redundant case define void @func28(i32 signext %a) { ; CHECK-LABEL: @func28 -; CHECK: cmplwi [[REG1:[0-9]+]], [[REG2:[0-9]+]] +; CHECK: mr. 30, 3 +; CHECK-NOT: cmplwi ; CHECK: .[[LABEL2:[A-Z0-9_]+]]: -; CHECK: cmpwi [[REG1]], [[REG2]] +; CHECK: mr. 30, 3 +; CHECK-NOT: cmpwi ; CHECK: ble 0, .[[LABEL1:[A-Z0-9_]+]] ; CHECK-NOT: cmp ; CHECK: bne 0, .[[LABEL2]] diff --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll --- a/llvm/test/CodeGen/PowerPC/csr-split.ll +++ b/llvm/test/CodeGen/PowerPC/csr-split.ll @@ -97,9 +97,8 @@ ; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: std r0, 16(r1) ; CHECK-PWR9-NEXT: stdu r1, -48(r1) -; CHECK-PWR9-NEXT: mr r30, r3 +; CHECK-PWR9-NEXT: mr. r30, r3 ; CHECK-PWR9-NEXT: li r3, 0 -; CHECK-PWR9-NEXT: cmpldi r30, 0 ; CHECK-PWR9-NEXT: beq cr0, .LBB1_3 ; CHECK-PWR9-NEXT: # %bb.1: # %if.end ; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll b/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll --- a/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll +++ b/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test2 define i8 @test2(i32 %a) { entry: -; CHECK: rlwinm [[REG:[0-9]+]], {{[0-9]+}}, 0, 28, 23 -; CHECK: cmplwi [[REG]], 0 +; CHECK: rlwinm. [[REG:[0-9]+]], {{[0-9]+}}, 0, 28, 23 +; CHECK-NOT: cmplwi [[REG]], 0 ; CHECK: beq 0 %0 = and i32 %a, -241 %1 = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -137,8 +137,7 @@ define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) { ; CHECK-LABEL: all_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: or 3, 3, 4 -; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: or. 3, 3, 4 ; CHECK-NEXT: blt 0, .LBB9_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: li 3, 4 @@ -287,8 +286,7 @@ define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) { ; CHECK-LABEL: any_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: and. 3, 3, 4 ; CHECK-NEXT: blt 0, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: li 3, 4