diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -336,7 +336,8 @@ bool isRegElgibleForForwarding(const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, - bool &IsFwdFeederRegKilled) const; + bool &IsFwdFeederRegKilled, + bool &SeenIntermediateUse) const; unsigned getSpillTarget() const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; @@ -728,6 +729,8 @@ int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + bool optimizeCmpPostRA(MachineInstr &MI) const; + /// Get the base operand and byte offset of an instruction that reads/writes /// memory. bool getMemOperandsWithOffsetWidth( diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2768,6 +2768,78 @@ return true; } +bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const { + MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo(); + if (MRI->isSSA()) + return false; + + Register SrcReg, SrcReg2; + int64_t CmpMask, CmpValue; + if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue)) + return false; + + // Try to optimize the comparison against 0. + if (CmpValue || !CmpMask || SrcReg2) + return false; + + // The record forms set the condition register based on a signed comparison + // with zero (see comments in optimizeCompareInstr). Since we can't do the + // equality checks in post-RA, we are more restricted on a unsigned + // comparison. + unsigned Opc = CmpMI.getOpcode(); + if (Opc == PPC::CMPLWI || (Subtarget.isPPC64() && Opc == PPC::CMPLDI)) + return false; + + // CmpMI can't be deleted if it has implicit def. + if (CmpMI.hasImplicitDef()) + return false; + + bool SrcRegHasOtherUse = false; + MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse); + if (!SrcMI) + return false; + + MachineOperand RegMO = CmpMI.getOperand(0); + Register CRReg = RegMO.getReg(); + if (CRReg != PPC::CR0) + return false; + + // Make sure there is no def/use of CRReg between SrcMI and CmpMI. + bool SeenUseOfCRReg = false; + bool IsCRRegKilled = false; + if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled, + SeenUseOfCRReg) || + SrcMI->definesRegister(CRReg) || SeenUseOfCRReg) + return false; + + int SrcMIOpc = SrcMI->getOpcode(); + int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc); + if (NewOpC == -1) + return false; + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(SrcMI->dump()); + + const MCInstrDesc &NewDesc = get(NewOpC); + SrcMI->setDesc(NewDesc); + MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI) + .addReg(CRReg, RegState::ImplicitDefine); + SrcMI->clearRegisterDeads(CRReg); + + // Fix up killed/dead flag for SrcReg after transformation. + if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill()) + fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg); + + assert(SrcMI->definesRegister(PPC::CR0) && + "Record-form instruction does not define cr0?"); + + LLVM_DEBUG(dbgs() << "with: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(CmpMI.dump()); + return true; +} + bool PPCInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -4469,7 +4541,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding( const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, - bool &IsFwdFeederRegKilled) const { + bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const { // x = addi y, imm // ... // z = lfdx 0, x -> z = lfd imm(y) @@ -4491,6 +4563,8 @@ return false; else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) IsFwdFeederRegKilled = true; + if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) + SeenIntermediateUse = true; // Made it to DefMI without encountering a clobber. if ((&*It) == &DefMI) break; @@ -4930,9 +5004,10 @@ return false; bool IsFwdFeederRegKilled = false; + bool SeenIntermediateUse = false; // Check if the RegMO can be forwarded to MI. if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI, - IsFwdFeederRegKilled)) + IsFwdFeederRegKilled, SeenIntermediateUse)) return false; // Get killed info in case fixup needed after transformation. diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -38,6 +38,8 @@ "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); +STATISTIC(NumCmpsInPreEmit, + "Number of compares eliminated in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -508,6 +510,13 @@ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } + if (TII->optimizeCmpPostRA(MI)) { + Changed = true; + NumCmpsInPreEmit++; + LLVM_DEBUG(dbgs() << "Optimize compare by using record form: "); + LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + } } // Eliminate conditional branch based on a constant CR bit by diff --git a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll --- a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll +++ b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll @@ -719,7 +719,8 @@ ; CHECK-LABEL: @func28 ; CHECK: cmplwi [[REG1:[0-9]+]], [[REG2:[0-9]+]] ; CHECK: .[[LABEL2:[A-Z0-9_]+]]: -; CHECK: cmpwi [[REG1]], [[REG2]] +; CHECK: mr. 30, 3 +; CHECK-NOT: cmpwi ; CHECK: ble 0, .[[LABEL1:[A-Z0-9_]+]] ; CHECK-NOT: cmp ; CHECK: bne 0, .[[LABEL2]] diff --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll --- a/llvm/test/CodeGen/PowerPC/csr-split.ll +++ b/llvm/test/CodeGen/PowerPC/csr-split.ll @@ -12,8 +12,7 @@ define dso_local signext i32 @test1(i32* %b) local_unnamed_addr { ; CHECK-P10-LABEL: test1: -; CHECK-P10: .localentry test1, 1 -; CHECK-P10-NEXT: # %bb.0: # %entry +; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mflr r0 ; CHECK-P10-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P10-NEXT: .cfi_offset lr, 16 @@ -118,8 +117,7 @@ define dso_local signext i32 @test2(i32* %p1) local_unnamed_addr { ; CHECK-P10-LABEL: test2: -; CHECK-P10: .localentry test2, 1 -; CHECK-P10-NEXT: # %bb.0: # %entry +; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mflr r0 ; CHECK-P10-NEXT: .cfi_def_cfa_offset 48 ; CHECK-P10-NEXT: .cfi_offset lr, 16 @@ -231,8 +229,7 @@ define dso_local i8* @test3(i8** nocapture %p1, i8 zeroext %p2) local_unnamed_addr { ; CHECK-P10-LABEL: test3: -; CHECK-P10: .localentry test3, 1 -; CHECK-P10-NEXT: # %bb.0: # %entry +; CHECK-P10: # %bb.0: # %entry ; CHECK-P10-NEXT: mflr r0 ; CHECK-P10-NEXT: .cfi_def_cfa_offset 64 ; CHECK-P10-NEXT: .cfi_offset lr, 16 diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -580,8 +580,7 @@ ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: stxv v2, -32(r1) # 16-byte Folded Spill ; CHECK-O0-NEXT: std r3, -8(r1) # 8-byte Folded Spill -; CHECK-O0-NEXT: mr r3, r7 -; CHECK-O0-NEXT: cmpwi r3, 0 +; CHECK-O0-NEXT: mr. r3, r7 ; CHECK-O0-NEXT: beq cr0, .LBB5_2 ; CHECK-O0-NEXT: # %bb.1: # %if.then ; CHECK-O0-NEXT: xxsetaccz acc0 @@ -641,8 +640,7 @@ ; CHECK-O0-BE: # %bb.0: # %entry ; CHECK-O0-BE-NEXT: stxv v2, -32(r1) # 16-byte Folded Spill ; CHECK-O0-BE-NEXT: std r3, -8(r1) # 8-byte Folded Spill -; CHECK-O0-BE-NEXT: mr r3, r7 -; CHECK-O0-BE-NEXT: cmpwi r3, 0 +; CHECK-O0-BE-NEXT: mr. r3, r7 ; CHECK-O0-BE-NEXT: beq cr0, .LBB5_2 ; CHECK-O0-BE-NEXT: # %bb.1: # %if.then ; CHECK-O0-BE-NEXT: xxsetaccz acc0 @@ -1297,9 +1295,8 @@ ; CHECK-O0-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; CHECK-O0-NEXT: std r5, -168(r1) # 8-byte Folded Spill ; CHECK-O0-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-O0-NEXT: mr r3, r4 +; CHECK-O0-NEXT: mr. r3, r4 ; CHECK-O0-NEXT: stw r3, -148(r1) # 4-byte Folded Spill -; CHECK-O0-NEXT: cmpwi r3, 0 ; CHECK-O0-NEXT: ble cr0, .LBB9_2 ; CHECK-O0-NEXT: # %bb.1: # %for.body.preheader ; CHECK-O0-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload @@ -1419,9 +1416,8 @@ ; CHECK-O0-BE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill ; CHECK-O0-BE-NEXT: std r5, -168(r1) # 8-byte Folded Spill ; CHECK-O0-BE-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-O0-BE-NEXT: mr r3, r4 +; CHECK-O0-BE-NEXT: mr. r3, r4 ; CHECK-O0-BE-NEXT: stw r3, -148(r1) # 4-byte Folded Spill -; CHECK-O0-BE-NEXT: cmpwi r3, 0 ; CHECK-O0-BE-NEXT: ble cr0, .LBB9_2 ; CHECK-O0-BE-NEXT: # %bb.1: # %for.body.preheader ; CHECK-O0-BE-NEXT: lwz r3, -148(r1) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir @@ -0,0 +1,118 @@ +# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s + +--- +name: test1 +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr0 = CMPWI renamable $r3, 0, implicit killed $x3 + ; CHECK-LABEL: name: test1 + ; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $r3, implicit-def $cr0 + ; CHECK-NOT: CMPLWI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test2 +# The imm of the comparison instr isn't 0. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr0 = CMPWI renamable $r3, 2, implicit killed $x3 + ; CHECK-LABEL: name: test2 + ; CHECK: CMPWI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test3 +# The comparison instr has a implicit def. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr0 = CMPWI renamable $r3, 0, implicit-def $x3 + ; CHECK-LABEL: name: test3 + ; CHECK: CMPWI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test4 +# There is another use for cr0 between OR8 instr and CMPWI instr. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4, $cr0 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr1 = MCRF killed $cr0, implicit $x3 + renamable $cr0 = CMPWI renamable $r3, 0, implicit killed $x3, implicit $cr1 + ; CHECK-LABEL: name: test4 + ; CHECK: CMPWI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test5 +# There is another def for cr0 between OR8 instr and CMPWI instr. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, renamable $x4 + renamable $cr1 = CMPW renamable $r3, renamable $r4, implicit $x4, implicit-def $cr0 + renamable $cr0 = CMPWI renamable $r3, 0, implicit killed $x3, implicit $cr1 + ; CHECK-LABEL: name: test5 + ; CHECK: CMPWI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/optcmp.ll b/llvm/test/CodeGen/PowerPC/optcmp.ll --- a/llvm/test/CodeGen/PowerPC/optcmp.ll +++ b/llvm/test/CodeGen/PowerPC/optcmp.ll @@ -36,20 +36,18 @@ define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 { ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slw 4, 3, 4 +; CHECK-NEXT: slw. 4, 3, 4 ; CHECK-NEXT: li 6, 0 ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: cmpwi 4, 0 ; CHECK-NEXT: stw 4, 0(5) ; CHECK-NEXT: iselgt 3, 3, 6 ; CHECK-NEXT: blr ; ; CHECK-NO-ISEL-LABEL: foo2: ; CHECK-NO-ISEL: # %bb.0: # %entry -; CHECK-NO-ISEL-NEXT: slw 4, 3, 4 +; CHECK-NO-ISEL-NEXT: slw. 4, 3, 4 ; CHECK-NO-ISEL-NEXT: li 6, 0 ; CHECK-NO-ISEL-NEXT: li 3, 1 -; CHECK-NO-ISEL-NEXT: cmpwi 4, 0 ; CHECK-NO-ISEL-NEXT: stw 4, 0(5) ; CHECK-NO-ISEL-NEXT: bclr 12, 1, 0 ; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -2947,10 +2947,9 @@ ; LE-P10-O0-NEXT: std r0, 16(r1) ; LE-P10-O0-NEXT: hashst r0, -8(r1) ; LE-P10-O0-NEXT: stdu r1, -64(r1) -; LE-P10-O0-NEXT: mr r4, r3 +; LE-P10-O0-NEXT: mr. r4, r3 ; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill ; LE-P10-O0-NEXT: li r3, 0 -; LE-P10-O0-NEXT: cmpdi r4, 0 ; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill ; LE-P10-O0-NEXT: beq cr0, .LBB2_2 ; LE-P10-O0-NEXT: # %bb.1: # %if.end @@ -2980,10 +2979,9 @@ ; LE-P9-O0-NEXT: std r0, 16(r1) ; LE-P9-O0-NEXT: hashst r0, -8(r1) ; LE-P9-O0-NEXT: stdu r1, -128(r1) -; LE-P9-O0-NEXT: mr r4, r3 +; LE-P9-O0-NEXT: mr. r4, r3 ; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill ; LE-P9-O0-NEXT: li r3, 0 -; LE-P9-O0-NEXT: cmpdi r4, 0 ; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill ; LE-P9-O0-NEXT: beq cr0, .LBB2_2 ; LE-P9-O0-NEXT: # %bb.1: # %if.end @@ -3013,10 +3011,9 @@ ; LE-P8-O0-NEXT: std r0, 16(r1) ; LE-P8-O0-NEXT: hashst r0, -8(r1) ; LE-P8-O0-NEXT: stdu r1, -128(r1) -; LE-P8-O0-NEXT: mr r4, r3 +; LE-P8-O0-NEXT: mr. r4, r3 ; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill ; LE-P8-O0-NEXT: li r3, 0 -; LE-P8-O0-NEXT: cmpdi r4, 0 ; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill ; LE-P8-O0-NEXT: beq cr0, .LBB2_2 ; LE-P8-O0-NEXT: # %bb.1: # %if.end diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -137,8 +137,7 @@ define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) { ; CHECK-LABEL: all_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: or 3, 3, 4 -; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: or. 3, 3, 4 ; CHECK-NEXT: blt 0, .LBB9_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: li 3, 4 @@ -287,8 +286,7 @@ define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) { ; CHECK-LABEL: any_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: and. 3, 3, 4 ; CHECK-NEXT: blt 0, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: li 3, 4 diff --git a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll @@ -358,8 +358,7 @@ define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b_0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slw r5, r3, r4 -; CHECK-NEXT: cmpwi r5, 0 +; CHECK-NEXT: slw. r5, r3, r4 ; CHECK-NEXT: ble cr0, .LBB12_2 ; CHECK-NEXT: # %bb.1: # %return ; CHECK-NEXT: extsw r3, r4