Index: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -18,6 +18,7 @@ // //===---------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" @@ -806,6 +807,143 @@ combineSEXTAndSHL(MI, ToErase); break; } + case PPC::RLWINM: + case PPC::RLWINMo: + case PPC::RLWINM8: + case PPC::RLWINM8o: { + unsigned FoldingReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(FoldingReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); + if (SrcMI->getOpcode() != PPC::RLWINM && + SrcMI->getOpcode() != PPC::RLWINMo && + SrcMI->getOpcode() != PPC::RLWINM8 && + SrcMI->getOpcode() != PPC::RLWINM8o) + break; + assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && + MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && + SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && + "Invalid PPC::RLWINM Instruction!"); + uint64_t SHSrc = SrcMI->getOperand(2).getImm(); + uint64_t SHMI = MI.getOperand(2).getImm(); + uint64_t MBSrc = SrcMI->getOperand(3).getImm(); + uint64_t MBMI = MI.getOperand(3).getImm(); + uint64_t MESrc = SrcMI->getOperand(4).getImm(); + uint64_t MEMI = MI.getOperand(4).getImm(); + + assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && + "Invalid PPC::RLWINM Instruction!"); + + // If MBMI is bigger than MEMI, we always can not get run of ones. + // RotatedSrcMask non-wrap: + // 0........31|32........63 + // RotatedSrcMask: B---E B---E + // MaskMI: -----------|--E B------ + // Result: ----- --- (Bad candidate) + // + // RotatedSrcMask wrap: + // 0........31|32........63 + // RotatedSrcMask: --E B----|--E B---- + // MaskMI: -----------|--E B------ + // Result: --- -----|--- ----- (Bad candidate) + // + // One special case is RotatedSrcMask is a full set mask. + // RotatedSrcMask full: + // 0........31|32........63 + // RotatedSrcMask: ------EB---|-------EB--- + // MaskMI: -----------|--E B------ + // Result: -----------|--- ------- (Good candidate) + + // Mark special case. + bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); + + // For other MBMI > MEMI cases, just return. + if ((MBMI > MEMI) && !SrcMaskFull) + break; + + // Handle MBMI <= MEMI cases. + APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); + // In MI, we only need low 32 bits of SrcMI, just consider about low 32 + // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, + // while in PowerPC ISA, lowerest bit is at index 63. + APInt MaskSrc = + APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); + // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is + // equal to highBit. + // If MBSrc - MESrc == 1, we expect a full set mask instead of Null. + if (SrcMaskFull && (MBSrc - MESrc == 1)) + MaskSrc.setAllBits(); + + APInt RotatedSrcMask = MaskSrc.rotl(SHMI); + APInt FinalMask = RotatedSrcMask & MaskMI; + uint32_t NewMB, NewME; + + // If final mask is 0, MI result should be 0 too. + if (FinalMask.isNullValue()) { + bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 || + MI.getOpcode() == PPC::RLWINM8o); + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(MI.dump()); + + if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { + // Replace MI with "LI 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.getOperand(1).ChangeToImmediate(0); + MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI)); + } else { + // Replace MI with "ANDIo reg, 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.getOperand(2).setImm(0); + MI.setDesc(TII->get(Is64Bit ? PPC::ANDIo8 : PPC::ANDIo)); + } + Simplified = true; + NumRotatesCollapsed++; + + LLVM_DEBUG(dbgs() << "With: "); + LLVM_DEBUG(MI.dump()); + } else if (isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, + NewME) || SrcMaskFull) { + // If FoldingReg has only one use and it it not RLWINMo and + // RLWINM8o, safe to delete its def SrcMI. Otherwise keep it. + if (MRI->hasOneNonDBGUse(FoldingReg) && + (SrcMI->getOpcode() == PPC::RLWINM || + SrcMI->getOpcode() == PPC::RLWINM8)) { + ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); + } + + LLVM_DEBUG(dbgs() << "Converting Instr: "); + LLVM_DEBUG(MI.dump()); + + uint16_t NewSH = (SHSrc + SHMI) % 32; + MI.getOperand(2).setImm(NewSH); + // If SrcMI mask is full, no need to update MBMI and MEMI. + if (!SrcMaskFull) { + MI.getOperand(3).setImm(NewMB); + MI.getOperand(4).setImm(NewME); + } + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); + + Simplified = true; + NumRotatesCollapsed++; + + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + } + break; + } } } Index: llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll +++ llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll @@ -11,8 +11,7 @@ ; CHECK-NEXT: xori r3, r3, 1 ; CHECK-NEXT: addis r4, r2, res@toc@ha ; CHECK-NEXT: cntlzw r3, r3 -; CHECK-NEXT: srwi r3, r3, 5 -; CHECK-NEXT: slwi r3, r3, 19 +; CHECK-NEXT: rlwinm r3, r3, 14, 0, 12 ; CHECK-NEXT: stw r3, res@toc@l(r4) ; CHECK-NEXT: blr entry: @@ -30,10 +29,10 @@ ; CHECK-NEXT: addis r4, r2, res2@toc@ha ; CHECK-NEXT: addis r6, r2, res@toc@ha ; CHECK-NEXT: cntlzw r3, r3 -; CHECK-NEXT: srwi r3, r3, 5 -; CHECK-NEXT: slwi r5, r3, 19 -; CHECK-NEXT: stw r3, res2@toc@l(r4) -; CHECK-NEXT: stw r5, res@toc@l(r6) +; CHECK-NEXT: srwi r5, r3, 5 +; CHECK-NEXT: rlwinm r3, r3, 14, 0, 12 +; CHECK-NEXT: stw r5, res2@toc@l(r4) +; CHECK-NEXT: stw r3, res@toc@l(r6) ; CHECK-NEXT: blr entry: %cmp = icmp eq i32 %var1, 1 Index: llvm/test/CodeGen/PowerPC/fold-rlwinm.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fold-rlwinm.mir @@ -0,0 +1,140 @@ +# RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s + +--- +name: testFoldRLWINM +#CHECK : name : testFoldRLWINM +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 5, 31 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 31 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM %1, 14, 0, 12 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMSrcFullMask1 +#CHECK : name : testFoldRLWINMSrcFullMask1 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 0, 31 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 0, 31 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM %1, 14, 0, 12 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMSrcFullMask2 +#CHECK : name : testFoldRLWINMSrcFullMask2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 10, 9 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 10, 9 + %3:gprc = RLWINM %2:gprc, 19, 10, 1 + ; CHECK: %3:gprc = RLWINM %1, 14, 10, 1 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMSrcWrapped +#CHECK : name : testFoldRLWINMSrcWrapped +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 30, 10 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 30 ,10 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM %1, 14, 11, 12 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMUserWrapped +#CHECK : name : testFoldRLWINMUserWrapped +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 10, 5, 31 + ; CHECKT: %2:gprc = RLWINM %1:gprc, 10, 5, 31 + %3:gprc = RLWINM %2:gprc, 10, 30, 5 + ; CHECK: %3:gprc = RLWINM %2, 10, 30, 5 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMMultipleUses +#CHECK : name : testFoldRLWINMMultipleUses +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM killed %1:gprc, 27, 5, 31 + ; CHECK: %2:gprc = RLWINM %1, 27, 5, 31 + %3:gprc = RLWINM %2:gprc, 19, 0, 12 + ; CHECK: %3:gprc = RLWINM killed %1, 14, 0, 12 + STW %3:gprc, %2:gprc, 100 + ; CHECK: STW %3, %2, 100 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMToZero +#CHECK : name : testFoldRLWINMToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 5, 10 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 10 + %3:gprc = RLWINM %2:gprc, 8, 5, 10 + ; CHECK: %3:gprc = LI 0 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMoToZero +#CHECK : name : testFoldRLWINMoToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 27, 5, 10 + ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 10 + %3:gprc = RLWINMo %2:gprc, 8, 5, 10, implicit-def $cr0 + ; CHECK: %3:gprc = ANDIo %2, 0, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMInvalidMask +#CHECK : name : testFoldRLWINMInvalidMask +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 20, 5, 31 + ; CHECK: %2:gprc = RLWINM %1, 20, 5, 31 + %3:gprc = RLWINM %2:gprc, 19, 10, 20 + ; CHECK: %3:gprc = RLWINM %2, 19, 10, 20 + BLR8 implicit $lr8, implicit $rm +...