diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -251,7 +251,8 @@ bool isRegElgibleForForwarding(const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, - bool &IsFwdFeederRegKilled) const; + bool &IsFwdFeederRegKilled, + bool &SeenIntermediateUse) const; unsigned getSpillTarget() const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; @@ -644,6 +645,8 @@ int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + bool optimizeCmpPostRA(MachineInstr &MI) const; + /// Get the base operand and byte offset of an instruction that reads/writes /// memory. bool getMemOperandsWithOffsetWidth( diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2768,6 +2768,85 @@ return true; } +bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const { + MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo(); + if (MRI->isSSA()) + return false; + + Register SrcReg, SrcReg2; + int64_t CmpMask, CmpValue; + if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue)) + return false; + + // Try to optimize the comparison against 0. + if (CmpValue || !CmpMask || SrcReg2) + return false; + + // The record forms set the condition register based on a signed comparison + // with zero (see comments in optimizeCompareInstr). Since we can't do the + // equality checks in post-RA, we are more restricted on a unsigned + // comparison. + unsigned Opc = CmpMI.getOpcode(); + if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI) + return false; + + // The record forms are always based on a 64-bit comparison on PPC64 + // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit + // comparison. Since we can't do the equality checks in post-RA, we bail out + // the case. + if (Subtarget.isPPC64() && Opc == PPC::CMPWI) + return false; + + // CmpMI can't be deleted if it has implicit def. + if (CmpMI.hasImplicitDef()) + return false; + + bool SrcRegHasOtherUse = false; + MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse); + if (!SrcMI || !SrcMI->definesRegister(SrcReg)) + return false; + + MachineOperand RegMO = CmpMI.getOperand(0); + Register CRReg = RegMO.getReg(); + if (CRReg != PPC::CR0) + return false; + + // Make sure there is no def/use of CRReg between SrcMI and CmpMI. + bool SeenUseOfCRReg = false; + bool IsCRRegKilled = false; + if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled, + SeenUseOfCRReg) || + SrcMI->definesRegister(CRReg) || SeenUseOfCRReg) + return false; + + int SrcMIOpc = SrcMI->getOpcode(); + int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc); + if (NewOpC == -1) + return false; + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(SrcMI->dump()); + + const MCInstrDesc &NewDesc = get(NewOpC); + SrcMI->setDesc(NewDesc); + MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI) + .addReg(CRReg, RegState::ImplicitDefine); + SrcMI->clearRegisterDeads(CRReg); + + // Fix up killed/dead flag for SrcReg after transformation. + if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill()) + fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg); + + assert(SrcMI->definesRegister(PPC::CR0) && + "Record-form instruction does not define cr0?"); + + LLVM_DEBUG(dbgs() << "with: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(CmpMI.dump()); + return true; +} + bool PPCInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, @@ -4427,7 +4506,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding( const MachineOperand &RegMO, const MachineInstr &DefMI, const MachineInstr &MI, bool KillDefMI, - bool &IsFwdFeederRegKilled) const { + bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const { // x = addi y, imm // ... // z = lfdx 0, x -> z = lfd imm(y) @@ -4449,6 +4528,8 @@ return false; else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) IsFwdFeederRegKilled = true; + if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) + SeenIntermediateUse = true; // Made it to DefMI without encountering a clobber. if ((&*It) == &DefMI) break; @@ -4888,9 +4969,10 @@ return false; bool IsFwdFeederRegKilled = false; + bool SeenIntermediateUse = false; // Check if the RegMO can be forwarded to MI. if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI, - IsFwdFeederRegKilled)) + IsFwdFeederRegKilled, SeenIntermediateUse)) return false; // Get killed info in case fixup needed after transformation. diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -38,6 +38,8 @@ "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); +STATISTIC(NumCmpsInPreEmit, + "Number of compares eliminated in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -508,6 +510,13 @@ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } + if (TII->optimizeCmpPostRA(MI)) { + Changed = true; + NumCmpsInPreEmit++; + LLVM_DEBUG(dbgs() << "Optimize compare by using record form: "); + LLVM_DEBUG(MI.dump()); + InstrsToErase.push_back(&MI); + } } // Eliminate conditional branch based on a constant CR bit by diff --git a/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir @@ -0,0 +1,142 @@ +# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s + +--- +name: test1 +# The cmp instr is optimized with the record form. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3 + ; CHECK-LABEL: name: test1 + ; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $cr0 + ; CHECK-NOT: CMPDI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test2 +# The imm of the comparison instr isn't 0. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr0 = CMPDI renamable $x3, 2, implicit killed $x3 + ; CHECK-LABEL: name: test2 + ; CHECK: CMPDI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test3 +# The comparison instr has a implicit def. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr0 = CMPDI renamable $x3, 0, implicit-def $x3 + ; CHECK-LABEL: name: test3 + ; CHECK: CMPDI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test4 +# There is another use for cr0 between OR8 instr and CMPWI instr. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4, $cr0 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr1 = MCRF killed $cr0, implicit $x3 + renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1 + ; CHECK-LABEL: name: test4 + ; CHECK: CMPDI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test5 +# There is another def for cr0 between OR8 instr and CMPWI instr. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, renamable $x4 + renamable $cr1 = CMPD renamable $x3, renamable $x4, implicit-def $cr0 + renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1 + ; CHECK-LABEL: name: test5 + ; CHECK: CMPDI + BCC 68, killed renamable $cr0, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +--- +name: test6 +# The SrcReg isn't CR0. +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $x3, $x4 + renamable $x3 = OR8 killed renamable $x3, killed renamable $x4 + renamable $cr1 = CMPDI renamable $x3, 0, implicit killed $x3 + ; CHECK-LABEL: name: test6 + ; CHECK: CMPDI + BCC 68, killed renamable $cr1, %bb.2 + + bb.1: + $x3 = LI8 102 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + + bb.2: + $x3 = LI8 116 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -2946,10 +2946,9 @@ ; LE-P10-O0-NEXT: std r0, 16(r1) ; LE-P10-O0-NEXT: hashst r0, -8(r1) ; LE-P10-O0-NEXT: stdu r1, -64(r1) -; LE-P10-O0-NEXT: mr r4, r3 +; LE-P10-O0-NEXT: mr. r4, r3 ; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill ; LE-P10-O0-NEXT: li r3, 0 -; LE-P10-O0-NEXT: cmpdi r4, 0 ; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill ; LE-P10-O0-NEXT: beq cr0, .LBB2_2 ; LE-P10-O0-NEXT: # %bb.1: # %if.end @@ -2979,10 +2978,9 @@ ; LE-P9-O0-NEXT: std r0, 16(r1) ; LE-P9-O0-NEXT: hashst r0, -8(r1) ; LE-P9-O0-NEXT: stdu r1, -128(r1) -; LE-P9-O0-NEXT: mr r4, r3 +; LE-P9-O0-NEXT: mr. r4, r3 ; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill ; LE-P9-O0-NEXT: li r3, 0 -; LE-P9-O0-NEXT: cmpdi r4, 0 ; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill ; LE-P9-O0-NEXT: beq cr0, .LBB2_2 ; LE-P9-O0-NEXT: # %bb.1: # %if.end @@ -3012,10 +3010,9 @@ ; LE-P8-O0-NEXT: std r0, 16(r1) ; LE-P8-O0-NEXT: hashst r0, -8(r1) ; LE-P8-O0-NEXT: stdu r1, -128(r1) -; LE-P8-O0-NEXT: mr r4, r3 +; LE-P8-O0-NEXT: mr. r4, r3 ; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill ; LE-P8-O0-NEXT: li r3, 0 -; LE-P8-O0-NEXT: cmpdi r4, 0 ; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill ; LE-P8-O0-NEXT: beq cr0, .LBB2_2 ; LE-P8-O0-NEXT: # %bb.1: # %if.end