diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3231,18 +3231,20 @@ return false; } -// This function tries to combine two RLWINMs. We not only perform such -// optimization in SSA, but also after RA, since some RLWINM is generated after -// RA. +// This function tries to combine RLWINM with RLWINM/ANDI. We not only perform +// such optimization in SSA, but also after RA, since some RLWINM is generated +// after RA. bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, MachineInstr *&ToErase) const { bool Is64Bit = false; switch (MI.getOpcode()) { case PPC::RLWINM: case PPC::RLWINM_rec: + case PPC::ANDI_rec: break; case PPC::RLWINM8: case PPC::RLWINM8_rec: + case PPC::ANDI8_rec: Is64Bit = true; break; default: @@ -3279,28 +3281,74 @@ default: return false; } + MachineOperand ForwardRegOp = SrcMI->getOperand(1); + Register ForwardReg = ForwardRegOp.getReg(); if (MRI->isSSA()) { CanErase = !SrcMI->hasImplicitDef() && MRI->hasOneNonDBGUse(FoldingReg); } else { - CanErase = !OtherIntermediateUse && MI.getOperand(1).isKill() && - !SrcMI->hasImplicitDef(); + bool KillFwdDefMI = !OtherIntermediateUse && MI.getOperand(1).isKill(); + CanErase = KillFwdDefMI && !SrcMI->hasImplicitDef(); // In post-RA, if SrcMI also defines the register to be forwarded, we can // only do the folding if SrcMI is going to be erased. - if (!CanErase && SrcMI->definesRegister(SrcMI->getOperand(1).getReg())) + if (!CanErase && SrcMI->definesRegister(ForwardReg)) + return false; + bool IsFwdFeederRegKilled = false; + // Check if the SrcReg can be forwarded to MI. + if (!isRegElgibleForForwarding(ForwardRegOp, *SrcMI, MI, KillFwdDefMI, + IsFwdFeederRegKilled)) return false; } - assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && - MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && - SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && - "Invalid PPC::RLWINM Instruction!"); - uint64_t SHSrc = SrcMI->getOperand(2).getImm(); - uint64_t SHMI = MI.getOperand(2).getImm(); - uint64_t MBSrc = SrcMI->getOperand(3).getImm(); - uint64_t MBMI = MI.getOperand(3).getImm(); - uint64_t MESrc = SrcMI->getOperand(4).getImm(); - uint64_t MEMI = MI.getOperand(4).getImm(); - assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && + assert((SrcMI->getOperand(2).isImm() && SrcMI->getOperand(3).isImm() && + SrcMI->getOperand(4).isImm()) && "Invalid PPC::RLWINM Instruction!"); + uint32_t SHSrc = SrcMI->getOperand(2).getImm(); + uint32_t MBSrc = SrcMI->getOperand(3).getImm(); + uint32_t MESrc = SrcMI->getOperand(4).getImm(); + assert((MESrc < 32 && MBSrc < 32) && "Invalid PPC::RLWINM Instruction!"); + // Note that in APInt, lowerest bit is at index 0, while in PowerPC ISA, + // lowerest bit is at index 63. + APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); + // Mark the cases where SrcMI's mask is full. + bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); + bool Simplified = false; + uint32_t SHMI, MBMI, MEMI, NewMB, NewME, FinalMask; + // Pattern 1: RLWINM_ + RLWINM_ + if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM_rec || + MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec) { + assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && + MI.getOperand(4).isImm()) && + "Invalid PPC::RLWINM Instruction!"); + SHMI = MI.getOperand(2).getImm(); + MBMI = MI.getOperand(3).getImm(); + MEMI = MI.getOperand(4).getImm(); + assert((MEMI < 32 && MBMI < 32) && "Invalid PPC::RLWINM Instruction!"); + + // For other MBMI > MEMI cases, just return. + if ((MBMI > MEMI) && !SrcMaskFull) + return false; + + // Handle MBMI <= MEMI cases. + // In MI, we only need low 32 bits of SrcMI, just consider about low 32 + // bit of SrcMI mask. + APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); + APInt RotatedSrcMask = MaskSrc.rotl(SHMI); + FinalMask = (RotatedSrcMask & MaskMI).getZExtValue(); + } + // Pattern 2: RLWINM_ + ANDI_ + else { + assert(MI.getOperand(2).isImm() && "Invalid PPC::ANDI_rec Instruction!"); + uint32_t AndImm = MI.getOperand(2).getImm(); + assert(isUIntN(16, AndImm) && "Invalid PPC::ANDI_rec Instruction!"); + // We can treat ANDI_rec as RLWINM_rec with the SH = 0 if the AndImm + // contains a non-empty sequence of ones with the remainder zeros + // (isRunOfOnes). + SHMI = 0; + FinalMask = MaskSrc.getZExtValue() & AndImm; + // If AndImm isn't isRunOfOnes, we can only do the folding when FinalMask + // equals to zero. + if (!isShiftedMask_32(AndImm) && FinalMask != 0) + return false; + } // If MBMI is bigger than MEMI, we always can not get run of ones. // RotatedSrcMask non-wrap: // 0........31|32........63 @@ -3321,27 +3369,8 @@ // MaskMI: -----------|--E B------ // Result: -----------|--- ------- (Good candidate) - // Mark special case. - bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); - - // For other MBMI > MEMI cases, just return. - if ((MBMI > MEMI) && !SrcMaskFull) - return false; - - // Handle MBMI <= MEMI cases. - APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); - // In MI, we only need low 32 bits of SrcMI, just consider about low 32 - // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, - // while in PowerPC ISA, lowerest bit is at index 63. - APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); - - APInt RotatedSrcMask = MaskSrc.rotl(SHMI); - APInt FinalMask = RotatedSrcMask & MaskMI; - uint32_t NewMB, NewME; - bool Simplified = false; - // If final mask is 0, MI result should be 0 too. - if (FinalMask.isNullValue()) { + if (FinalMask == 0) { Simplified = true; LLVM_DEBUG(dbgs() << "Replace Instr: "); LLVM_DEBUG(MI.dump()); @@ -3355,11 +3384,14 @@ MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI)); } else { // Replace MI with "ANDI_rec reg, 0" - MI.RemoveOperand(4); - MI.RemoveOperand(3); + if (MI.getOpcode() == PPC::RLWINM_rec || + MI.getOpcode() == PPC::RLWINM8_rec) { + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); + } MI.getOperand(2).setImm(0); - MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); - MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + MI.getOperand(1).setReg(ForwardReg); if (SrcMI->getOperand(1).isKill()) { MI.getOperand(1).setIsKill(true); SrcMI->getOperand(1).setIsKill(false); @@ -3370,9 +3402,7 @@ LLVM_DEBUG(dbgs() << "With: "); LLVM_DEBUG(MI.dump()); - - } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) && - NewMB <= NewME) || + } else if ((isRunOfOnes(FinalMask, NewMB, NewME) && NewMB <= NewME) || SrcMaskFull) { // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger // than NewME. Otherwise we get a 64 bit value after folding, but MI @@ -3383,12 +3413,17 @@ uint16_t NewSH = (SHSrc + SHMI) % 32; MI.getOperand(2).setImm(NewSH); + if (MI.getOpcode() == PPC::ANDI_rec || MI.getOpcode() == PPC::ANDI8_rec) { + MI.setDesc(get(Is64Bit ? PPC::RLWINM8_rec : PPC::RLWINM_rec)); + MI.addOperand(MachineOperand::CreateImm(NewMB)); + MI.addOperand(MachineOperand::CreateImm(NewME)); + } // If SrcMI mask is full, no need to update MBMI and MEMI. - if (!SrcMaskFull) { + else if (!SrcMaskFull) { MI.getOperand(3).setImm(NewMB); MI.getOperand(4).setImm(NewME); } - MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + MI.getOperand(1).setReg(ForwardReg); if (SrcMI->getOperand(1).isKill()) { MI.getOperand(1).setIsKill(true); SrcMI->getOperand(1).setIsKill(false); diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -844,6 +844,8 @@ combineSEXTAndSHL(MI, ToErase); break; } + case PPC::ANDI_rec: + case PPC::ANDI8_rec: case PPC::RLWINM: case PPC::RLWINM_rec: case PPC::RLWINM8: diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir @@ -161,3 +161,65 @@ dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 ... +--- +name: testFoldRLWINMAndANDI +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMAndANDI + ; CHECK: liveins: $r3 + ; CHECK: dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 29, 29, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 + $r3 = RLWINM killed $r3, 8, 28, 31 + dead renamable $r3 = ANDI_rec killed renamable $r3, 4, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 +... +--- +name: testFoldRLWINMAndANDIToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testFoldRLWINMAndANDIToZero + ; CHECK: liveins: $r3 + ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 + $r3 = RLWINM killed $r3, 4, 28, 31 + dead renamable $r3 = ANDI_rec killed renamable $r3, 16, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 +... +--- +name: testRLWINMANDIInvalidMask +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r3 + ; CHECK-LABEL: name: testRLWINMANDIInvalidMask + ; CHECK: liveins: $r3 + ; CHECK: $r3 = RLWINM killed $r3, 4, 20, 31 + ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 9, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 + $r3 = RLWINM killed $r3, 4, 20, 31 + dead renamable $r3 = ANDI_rec killed renamable $r3, 9, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 +... +--- +name: testCanNotFoldRLWINMAndANDI +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r2, $r3 + ; CHECK-LABEL: name: testCanNotFoldRLWINMAndANDI + ; CHECK: liveins: $r2, $r3, $x2 + ; CHECK: STD $x2, -8, $x1 :: (store 8 into %stack.0) + ; CHECK: $r3 = RLWINM killed $r2, 4, 28, 31 + ; CHECK: $r2 = LI 0, implicit-def $x2 + ; CHECK: $x2 = LD -8, $x1 :: (load 8 from %stack.0) + ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 4, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0, implicit $x2 + $r3 = RLWINM killed $r2, 4, 28, 31 + $r2 = LI 0, implicit-def $x2 + dead renamable $r3 = ANDI_rec killed renamable $r3, 4, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm, implicit killed $cr0, implicit killed $x2 +... diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir @@ -192,8 +192,7 @@ ; CHECK: liveins: $x3 ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x3 ; CHECK: [[COPY1:%[0-9]+]]:gprc = COPY [[COPY]].sub_32 - ; CHECK: [[RLWINM:%[0-9]+]]:gprc = RLWINM [[COPY1]], 4, 28, 31 - ; CHECK: [[ANDI_rec:%[0-9]+]]:gprc = ANDI_rec [[RLWINM]], 4, implicit-def $cr0 + ; CHECK: [[RLWINM_rec:%[0-9]+]]:gprc = RLWINM_rec [[COPY1]], 4, 29, 29, implicit-def $cr0 ; CHECK: BLR8 implicit $lr8, implicit $rm %0:g8rc = COPY $x3 %1:gprc = COPY %0.sub_32:g8rc @@ -201,3 +200,21 @@ %3:gprc = ANDI_rec %2:gprc, 4, implicit-def $cr0 BLR8 implicit $lr8, implicit $rm ... +--- +name: testFoldRLWINMAndANDIToZero +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: testFoldRLWINMAndANDIToZero + ; CHECK: liveins: $x3 + ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x3 + ; CHECK: [[COPY1:%[0-9]+]]:gprc = COPY [[COPY]].sub_32 + ; CHECK: [[ANDI_rec:%[0-9]+]]:gprc = ANDI_rec [[COPY1]], 0, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit $rm + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM %1:gprc, 4, 28, 31 + %3:gprc = ANDI_rec %2:gprc, 32, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm +... diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll @@ -35,12 +35,10 @@ ; CHECK-NEXT: std r29, 56(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-NEXT: paddi r29, 0, .LJTI0_0@PCREL, 1 -; CHECK-NEXT: srwi r4, r3, 4 -; CHECK-NEXT: srwi r3, r3, 5 -; CHECK-NEXT: andi. r4, r4, 1 +; CHECK-NEXT: rlwinm. r4, r3, 28, 31, 31 ; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: crmove 4*cr4+lt, gt -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: rlwinm. r3, r3, 27, 31, 31 ; CHECK-NEXT: setnbc r3, gt ; CHECK-NEXT: stw r3, 52(r1) ; CHECK-NEXT: cmplwi cr3, r3, 336 @@ -229,12 +227,10 @@ ; CHECK-BE-NEXT: lwz r3, 0(r3) ; CHECK-BE-NEXT: std r29, 136(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: std r30, 144(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: srwi r4, r3, 4 -; CHECK-BE-NEXT: srwi r3, r3, 5 -; CHECK-BE-NEXT: andi. r4, r4, 1 +; CHECK-BE-NEXT: rlwinm. r4, r3, 28, 31, 31 ; CHECK-BE-NEXT: li r4, 0 ; CHECK-BE-NEXT: crmove 4*cr4+lt, gt -; CHECK-BE-NEXT: andi. r3, r3, 1 +; CHECK-BE-NEXT: rlwinm. r3, r3, 27, 31, 31 ; CHECK-BE-NEXT: setnbc r3, gt ; CHECK-BE-NEXT: stw r3, 132(r1) ; CHECK-BE-NEXT: cmplwi cr3, r3, 336 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll @@ -15,6 +15,10 @@ ; bit of any CR field is spilled. We need to test the spilling of a CR bit ; other than the LT bit. Hence this test case is rather complex. +; FIXME: A redundant COPY was generated during RA. +; i.e. rlwinm r29, r30, 0, 24, 22 +; mr r30, r29 + %0 = type { %1 } %1 = type { %0*, %0*, %0*, i32 } @@ -34,10 +38,12 @@ ; CHECK-NEXT: stdu r1, -80(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: .cfi_offset cr2, 8 ; CHECK-NEXT: .cfi_offset cr3, 8 ; CHECK-NEXT: .cfi_offset cr4, 8 +; CHECK-NEXT: std r29, 56(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 64(r1) # 8-byte Folded Spill ; CHECK-NEXT: bl call_2@notoc ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_13 @@ -48,7 +54,7 @@ ; CHECK-NEXT: # implicit-def: $r30 ; CHECK-NEXT: crnot 4*cr5+lt, 4*cr3+eq ; CHECK-NEXT: setnbc r3, 4*cr5+lt -; CHECK-NEXT: stw r3, 60(r1) +; CHECK-NEXT: stw r3, 52(r1) ; CHECK-NEXT: lwz r3, 0(r3) ; CHECK-NEXT: cmpwi cr4, r3, 0 ; CHECK-NEXT: .p2align 4 @@ -68,16 +74,17 @@ ; CHECK-NEXT: bc 12, 4*cr3+eq, .LBB0_9 ; CHECK-NEXT: # %bb.6: # %bb32 ; CHECK-NEXT: # -; CHECK-NEXT: rlwinm r30, r30, 0, 24, 22 -; CHECK-NEXT: andi. r3, r30, 2 +; CHECK-NEXT: rlwinm. r3, r30, 0, 30, 30 +; CHECK-NEXT: rlwinm r29, r30, 0, 24, 22 ; CHECK-NEXT: mcrf cr2, cr0 ; CHECK-NEXT: bl call_4@notoc +; CHECK-NEXT: mr r30, r29 ; CHECK-NEXT: beq+ cr2, .LBB0_3 ; CHECK-NEXT: # %bb.7: # %bb37 ; CHECK-NEXT: .LBB0_8: # %bb22 ; CHECK-NEXT: .LBB0_9: # %bb35 ; CHECK-NEXT: .LBB0_10: # %bb27 -; CHECK-NEXT: lwz r4, 60(r1) +; CHECK-NEXT: lwz r4, 52(r1) ; CHECK-NEXT: # implicit-def: $cr5lt ; CHECK-NEXT: mfocrf r3, 4 ; CHECK-NEXT: rlwimi r3, r4, 12, 20, 20 @@ -94,16 +101,18 @@ ; CHECK-BE-NEXT: mfcr r12 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stw r12, 8(r1) -; CHECK-BE-NEXT: stdu r1, -160(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_offset 160 +; CHECK-BE-NEXT: stdu r1, -176(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_offset 176 ; CHECK-BE-NEXT: .cfi_offset lr, 16 +; CHECK-BE-NEXT: .cfi_offset r28, -32 ; CHECK-BE-NEXT: .cfi_offset r29, -24 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 -; CHECK-BE-NEXT: std r29, 136(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: std r30, 144(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r28, 144(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r29, 152(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: bl call_2 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: bc 12, 4*cr5+lt, .LBB0_13 @@ -115,7 +124,7 @@ ; CHECK-BE-NEXT: # implicit-def: $r29 ; CHECK-BE-NEXT: crnot 4*cr5+lt, 4*cr3+eq ; CHECK-BE-NEXT: setnbc r3, 4*cr5+lt -; CHECK-BE-NEXT: stw r3, 132(r1) +; CHECK-BE-NEXT: stw r3, 140(r1) ; CHECK-BE-NEXT: lwz r3, 0(r3) ; CHECK-BE-NEXT: cmpwi cr4, r3, 0 ; CHECK-BE-NEXT: .p2align 4 @@ -136,17 +145,18 @@ ; CHECK-BE-NEXT: bc 12, 4*cr3+eq, .LBB0_9 ; CHECK-BE-NEXT: # %bb.6: # %bb32 ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: rlwinm r29, r29, 0, 24, 22 -; CHECK-BE-NEXT: andi. r3, r29, 2 +; CHECK-BE-NEXT: rlwinm. r3, r29, 0, 30, 30 +; CHECK-BE-NEXT: rlwinm r28, r29, 0, 24, 22 ; CHECK-BE-NEXT: mcrf cr2, cr0 ; CHECK-BE-NEXT: bl call_4 ; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r29, r28 ; CHECK-BE-NEXT: beq+ cr2, .LBB0_3 ; CHECK-BE-NEXT: # %bb.7: # %bb37 ; CHECK-BE-NEXT: .LBB0_8: # %bb22 ; CHECK-BE-NEXT: .LBB0_9: # %bb35 ; CHECK-BE-NEXT: .LBB0_10: # %bb27 -; CHECK-BE-NEXT: lwz r4, 132(r1) +; CHECK-BE-NEXT: lwz r4, 140(r1) ; CHECK-BE-NEXT: # implicit-def: $cr5lt ; CHECK-BE-NEXT: mfocrf r3, 4 ; CHECK-BE-NEXT: rlwimi r3, r4, 12, 20, 20 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll @@ -66,8 +66,7 @@ ; CHECK-NEXT: crnot 4*cr2+eq, eq ; CHECK-NEXT: bl call_2@notoc ; CHECK-NEXT: mr r27, r3 -; CHECK-NEXT: srwi r3, r28, 4 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: rlwinm. r3, r28, 28, 31, 31 ; CHECK-NEXT: crmove 4*cr2+gt, gt ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb9 @@ -75,8 +74,7 @@ ; CHECK-NEXT: mr r4, r30 ; CHECK-NEXT: bl call_3@notoc ; CHECK-NEXT: .LBB0_2: # %bb12 -; CHECK-NEXT: srwi r3, r28, 7 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: rlwinm. r3, r28, 25, 31, 31 ; CHECK-NEXT: crmove 4*cr2+un, gt ; CHECK-NEXT: bc 12, 4*cr2+eq, .LBB0_7 ; CHECK-NEXT: # %bb.3: # %bb37 @@ -214,8 +212,7 @@ ; CHECK-BE-NEXT: bl call_2 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: mr r27, r3 -; CHECK-BE-NEXT: srwi r3, r28, 4 -; CHECK-BE-NEXT: andi. r3, r3, 1 +; CHECK-BE-NEXT: rlwinm. r3, r28, 28, 31, 31 ; CHECK-BE-NEXT: crmove 4*cr2+gt, gt ; CHECK-BE-NEXT: bc 12, 4*cr5+lt, .LBB0_2 ; CHECK-BE-NEXT: # %bb.1: # %bb9 @@ -224,8 +221,7 @@ ; CHECK-BE-NEXT: bl call_3 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: .LBB0_2: # %bb12 -; CHECK-BE-NEXT: srwi r3, r28, 7 -; CHECK-BE-NEXT: andi. r3, r3, 1 +; CHECK-BE-NEXT: rlwinm. r3, r28, 25, 31, 31 ; CHECK-BE-NEXT: crmove 4*cr2+un, gt ; CHECK-BE-NEXT: bc 12, 4*cr2+eq, .LBB0_7 ; CHECK-BE-NEXT: # %bb.3: # %bb37 diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -113,8 +113,7 @@ ; CHECK-NEXT: xvtdivdp cr0, v2, v3 ; CHECK-NEXT: li r4, 222 ; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: srwi r3, r3, 28 -; CHECK-NEXT: andi. r3, r3, 2 +; CHECK-NEXT: rlwinm. r3, r3, 4, 30, 30 ; CHECK-NEXT: li r3, 22 ; CHECK-NEXT: iseleq r3, r4, r3 ; CHECK-NEXT: blr