diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -564,7 +564,8 @@ bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; bool foldFrameOffset(MachineInstr &MI) const; - bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const; + bool simplifyRotateAndMaskInstr(MachineInstr &MI, + MachineInstr *&ToErase) const; bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3189,18 +3189,54 @@ return false; } -bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, - MachineInstr **ToErase) const { +// This function tries to combine two RLWINMs. We not only perform such +// optimization in SSA, but also after RA, since some RLWINM is generated after +// RA. +bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, + MachineInstr *&ToErase) const { + bool Is64Bit = false; + switch (MI.getOpcode()) { + case PPC::RLWINM: + case PPC::RLWINM_rec: + break; + case PPC::RLWINM8: + case PPC::RLWINM8_rec: + Is64Bit = true; + break; + default: + return false; + } MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); - unsigned FoldingReg = MI.getOperand(1).getReg(); - if (!Register::isVirtualRegister(FoldingReg)) + Register FoldingReg = MI.getOperand(1).getReg(); + MachineInstr *SrcMI = nullptr; + bool NoUse = false; + if (MRI->isSSA()) { + if (!Register::isVirtualRegister(FoldingReg)) + return false; + SrcMI = MRI->getVRegDef(FoldingReg); + } else { + if (!Register::isPhysicalRegister(FoldingReg)) + return false; + bool OtherIntermediateUse = false; + SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse); + NoUse = !OtherIntermediateUse && MI.getOperand(1).isKill(); + } + if (!SrcMI) return false; - MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); - if (SrcMI->getOpcode() != PPC::RLWINM && - SrcMI->getOpcode() != PPC::RLWINM_rec && - SrcMI->getOpcode() != PPC::RLWINM8 && - SrcMI->getOpcode() != PPC::RLWINM8_rec) + switch (SrcMI->getOpcode()) { + case PPC::RLWINM: + case PPC::RLWINM_rec: + if (Is64Bit) + return false; + break; + case PPC::RLWINM8: + case PPC::RLWINM8_rec: + if (!Is64Bit) + return false; + break; + default: return false; + } assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && @@ -3255,8 +3291,6 @@ // If final mask is 0, MI result should be 0 too. if (FinalMask.isNullValue()) { - bool Is64Bit = - (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); Simplified = true; LLVM_DEBUG(dbgs() << "Replace Instr: "); LLVM_DEBUG(MI.dump()); @@ -3314,14 +3348,15 @@ LLVM_DEBUG(dbgs() << "To: "); LLVM_DEBUG(MI.dump()); } - if (Simplified & MRI->use_nodbg_empty(FoldingReg) && - !SrcMI->hasImplicitDef()) { - // If FoldingReg has no non-debug use and it has no implicit def (it - // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. - // Otherwise keep it. - *ToErase = SrcMI; - LLVM_DEBUG(dbgs() << "Delete dead instruction: "); - LLVM_DEBUG(SrcMI->dump()); + if (Simplified && !SrcMI->hasImplicitDef()) { + // If SrcMI has no implicit def, and FoldingReg has no non-debug use or + // its flag is "killed", it's safe to delete SrcMI. Otherwise keep it. + if ((!MRI->isSSA() && NoUse) || + (MRI->isSSA() && MRI->use_nodbg_empty(FoldingReg))) { + ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); + } } return Simplified; } diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -848,7 +848,7 @@ case PPC::RLWINM_rec: case PPC::RLWINM8: case PPC::RLWINM8_rec: { - Simplified = TII->combineRLWINM(MI, &ToErase); + Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase); if (Simplified) ++NumRotatesCollapsed; break; diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -37,6 +37,8 @@ "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); +STATISTIC(NumRotateInstrFoldInPreEmit, + "Number of folding Rotate instructions in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -413,6 +415,13 @@ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } + MachineInstr *ToErase = nullptr; + if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) { + Changed = true; + NumRotateInstrFoldInPreEmit++; + if (ToErase) + InstrsToErase.push_back(ToErase); + } } // Eliminate conditional branch based on a constant CR bit by diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \ +# RUN: ppc-pre-emit-peephole %s -o - | FileCheck %s + +--- +name: testFoldRLWINM +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINM + ; CHECK: liveins: $r3 + ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + $r3 = RLWINM killed $r3, 27, 5, 31 + dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 +... +--- +name: testFoldRLWINMSrcFullMask +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask + ; CHECK: liveins: $r3 + ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + $r3 = RLWINM killed $r3, 27, 0, 31 + dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 +... +--- +name: testFoldRLWINMSrcWrapped +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMSrcWrapped + ; CHECK: liveins: $r3 + ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + $r3 = RLWINM killed $r3, 27, 30, 10 + dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 +... +--- +name: testFoldRLWINMToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMToZero + ; CHECK: liveins: $r3 + ; CHECK: renamable $r3 = LI 0, implicit-def $x3 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + $r3 = RLWINM killed $r3, 27, 5, 10 + dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 +... +--- +name: testFoldRLWINM_recToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINM_recToZero + ; CHECK: liveins: $r3 + ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 + $r3 = RLWINM killed $r3, 27, 5, 10 + dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 +... +--- +name: testFoldRLWINMoToZeroSrcCanNotBeDeleted +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMoToZeroSrcCanNotBeDeleted + ; CHECK: liveins: $r3 + ; CHECK: $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def dead $cr0 + ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 + $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0 + dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 +... +--- +name: testFoldRLWINMInvalidMask +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMInvalidMask + ; CHECK: liveins: $r3 + ; CHECK: $r3 = RLWINM killed $r3, 20, 5, 31 + ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + $r3 = RLWINM killed $r3, 20, 5, 31 + dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-5.ll b/llvm/test/CodeGen/PowerPC/sms-phi-5.ll --- a/llvm/test/CodeGen/PowerPC/sms-phi-5.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi-5.ll @@ -14,9 +14,8 @@ ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: lhz 3, 0(3) ; CHECK-NEXT: slwi 3, 3, 15 -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: rlwinm 4, 3, 31, 17, 31 -; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: ori 3, 4, 0 ; CHECK-NEXT: rlwimi 3, 3, 15, 0, 16 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -131,8 +131,7 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvtdivdp cr0, v2, v3 ; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: srwi r3, r3, 28 -; CHECK-NEXT: rlwinm r3, r3, 28, 31, 31 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)