diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3231,18 +3231,20 @@
   return false;
 }
 
-// This function tries to combine two RLWINMs. We not only perform such
-// optimization in SSA, but also after RA, since some RLWINM is generated after
-// RA.
+// This function tries to combine two RLWINM + RLWINM/ANDI_rec. We not only
+// perform such optimization in SSA, but also after RA, since some RLWINM is
+// generated in POST-RA.
 bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI,
                                               MachineInstr *&ToErase) const {
   bool Is64Bit = false;
   switch (MI.getOpcode()) {
   case PPC::RLWINM:
   case PPC::RLWINM_rec:
+  case PPC::ANDI_rec:
     break;
   case PPC::RLWINM8:
   case PPC::RLWINM8_rec:
+  case PPC::ANDI8_rec:
     Is64Bit = true;
     break;
   default:
@@ -3279,125 +3281,190 @@
   default:
     return false;
   }
+  MachineOperand ForwardRegOp = SrcMI->getOperand(1);
+  Register ForwardReg = ForwardRegOp.getReg();
   if (MRI->isSSA()) {
     CanErase = !SrcMI->hasImplicitDef() && MRI->hasOneNonDBGUse(FoldingReg);
   } else {
-    CanErase = !OtherIntermediateUse && MI.getOperand(1).isKill() &&
-               !SrcMI->hasImplicitDef();
+    bool KillFwdDefMI = !OtherIntermediateUse && MI.getOperand(1).isKill();
+    CanErase = KillFwdDefMI && !SrcMI->hasImplicitDef();
     // In post-RA, if SrcMI also defines the register to be forwarded, we can
     // only do the folding if SrcMI is going to be erased.
-    if (!CanErase && SrcMI->definesRegister(SrcMI->getOperand(1).getReg()))
+    if (!CanErase && SrcMI->definesRegister(ForwardReg))
+      return false;
+    bool IsFwdFeederRegKilled = false;
+    // Check if the SrcReg can be forwarded to MI.
+    if (!isRegElgibleForForwarding(ForwardRegOp, *SrcMI, MI, KillFwdDefMI,
+                                   IsFwdFeederRegKilled))
       return false;
   }
-  assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
-          MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
-          SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+  assert((SrcMI->getOperand(2).isImm() && SrcMI->getOperand(3).isImm() &&
+          SrcMI->getOperand(4).isImm()) &&
          "Invalid PPC::RLWINM Instruction!");
   uint64_t SHSrc = SrcMI->getOperand(2).getImm();
-  uint64_t SHMI = MI.getOperand(2).getImm();
   uint64_t MBSrc = SrcMI->getOperand(3).getImm();
-  uint64_t MBMI = MI.getOperand(3).getImm();
   uint64_t MESrc = SrcMI->getOperand(4).getImm();
-  uint64_t MEMI = MI.getOperand(4).getImm();
-  assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
-         "Invalid PPC::RLWINM Instruction!");
-  // If MBMI is bigger than MEMI, we always can not get run of ones.
-  // RotatedSrcMask non-wrap:
-  //                 0........31|32........63
-  // RotatedSrcMask:   B---E        B---E
-  // MaskMI:         -----------|--E  B------
-  // Result:           -----          ---      (Bad candidate)
-  //
-  // RotatedSrcMask wrap:
-  //                 0........31|32........63
-  // RotatedSrcMask: --E   B----|--E    B----
-  // MaskMI:         -----------|--E  B------
-  // Result:         ---   -----|---    -----  (Bad candidate)
-  //
-  // One special case is RotatedSrcMask is a full set mask.
-  // RotatedSrcMask full:
-  //                 0........31|32........63
-  // RotatedSrcMask: ------EB---|-------EB---
-  // MaskMI:         -----------|--E  B------
-  // Result:         -----------|---  -------  (Good candidate)
-
-  // Mark special case.
-  bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
-
-  // For other MBMI > MEMI cases, just return.
-  if ((MBMI > MEMI) && !SrcMaskFull)
-    return false;
-
-  // Handle MBMI <= MEMI cases.
-  APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
-  // In MI, we only need low 32 bits of SrcMI, just consider about low 32
-  // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
-  // while in PowerPC ISA, lowerest bit is at index 63.
+  assert((MESrc < 32 && MBSrc < 32) && "Invalid PPC::RLWINM Instruction!");
+  // Note that in APInt, lowerest bit is at index 0, while in PowerPC ISA,
+  // lowerest bit is at index 63.
   APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
-
-  APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
-  APInt FinalMask = RotatedSrcMask & MaskMI;
-  uint32_t NewMB, NewME;
   bool Simplified = false;
+  uint32_t NewMB, NewME;
+  // Combine RLWINM + RLWINM.
+  if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM_rec ||
+      MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec) {
+    assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+            MI.getOperand(4).isImm()) &&
+           "Invalid PPC::RLWINM Instruction!");
+    uint64_t SHMI = MI.getOperand(2).getImm();
+    uint64_t MBMI = MI.getOperand(3).getImm();
+    uint64_t MEMI = MI.getOperand(4).getImm();
+    assert((MEMI < 32 && MBMI < 32) && "Invalid PPC::RLWINM Instruction!");
+    // If MBMI is bigger than MEMI, we always can not get run of ones.
+    // RotatedSrcMask non-wrap:
+    //                 0........31|32........63
+    // RotatedSrcMask:   B---E        B---E
+    // MaskMI:         -----------|--E  B------
+    // Result:           -----          ---      (Bad candidate)
+    //
+    // RotatedSrcMask wrap:
+    //                 0........31|32........63
+    // RotatedSrcMask: --E   B----|--E    B----
+    // MaskMI:         -----------|--E  B------
+    // Result:         ---   -----|---    -----  (Bad candidate)
+    //
+    // One special case is RotatedSrcMask is a full set mask.
+    // RotatedSrcMask full:
+    //                 0........31|32........63
+    // RotatedSrcMask: ------EB---|-------EB---
+    // MaskMI:         -----------|--E  B------
+    // Result:         -----------|---  -------  (Good candidate)
+
+    // Mark special case.
+    bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+    // For other MBMI > MEMI cases, just return.
+    if ((MBMI > MEMI) && !SrcMaskFull)
+      return false;
 
-  // If final mask is 0, MI result should be 0 too.
-  if (FinalMask.isNullValue()) {
-    Simplified = true;
-    LLVM_DEBUG(dbgs() << "Replace Instr: ");
-    LLVM_DEBUG(MI.dump());
+    // Handle MBMI <= MEMI cases.
+    APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+    // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+    // bit of SrcMI mask.
+    APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+    APInt FinalMask = RotatedSrcMask & MaskMI;
+
+    // If final mask is 0, MI result should be 0 too.
+    if (FinalMask.isNullValue()) {
+      Simplified = true;
+      LLVM_DEBUG(dbgs() << "Replace Instr: ");
+      LLVM_DEBUG(MI.dump());
+
+      if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+        // Replace MI with "LI 0"
+        MI.RemoveOperand(4);
+        MI.RemoveOperand(3);
+        MI.RemoveOperand(2);
+        MI.getOperand(1).ChangeToImmediate(0);
+        MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
+      } else {
+        // Replace MI with "ANDI_rec reg, 0"
+        MI.RemoveOperand(4);
+        MI.RemoveOperand(3);
+        MI.getOperand(2).setImm(0);
+        MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+        MI.getOperand(1).setReg(ForwardReg);
+        if (SrcMI->getOperand(1).isKill()) {
+          MI.getOperand(1).setIsKill(true);
+          SrcMI->getOperand(1).setIsKill(false);
+        } else
+          // About to replace MI.getOperand(1), clear its kill flag.
+          MI.getOperand(1).setIsKill(false);
+      }
 
-    if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
-      // Replace MI with "LI 0"
-      MI.RemoveOperand(4);
-      MI.RemoveOperand(3);
-      MI.RemoveOperand(2);
-      MI.getOperand(1).ChangeToImmediate(0);
-      MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
-    } else {
-      // Replace MI with "ANDI_rec reg, 0"
-      MI.RemoveOperand(4);
-      MI.RemoveOperand(3);
-      MI.getOperand(2).setImm(0);
-      MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
-      MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+      LLVM_DEBUG(dbgs() << "With: ");
+      LLVM_DEBUG(MI.dump());
+
+    } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
+                            NewME) &&
+                NewMB <= NewME) ||
+               SrcMaskFull) {
+      // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+      // than NewME. Otherwise we get a 64 bit value after folding, but MI
+      // return a 32 bit value.
+      Simplified = true;
+      LLVM_DEBUG(dbgs() << "Converting Instr: ");
+      LLVM_DEBUG(MI.dump());
+
+      uint16_t NewSH = (SHSrc + SHMI) % 32;
+      MI.getOperand(2).setImm(NewSH);
+      // If SrcMI mask is full, no need to update MBMI and MEMI.
+      if (!SrcMaskFull) {
+        MI.getOperand(3).setImm(NewMB);
+        MI.getOperand(4).setImm(NewME);
+      }
+      MI.getOperand(1).setReg(ForwardReg);
       if (SrcMI->getOperand(1).isKill()) {
         MI.getOperand(1).setIsKill(true);
         SrcMI->getOperand(1).setIsKill(false);
       } else
         // About to replace MI.getOperand(1), clear its kill flag.
         MI.getOperand(1).setIsKill(false);
-    }
 
-    LLVM_DEBUG(dbgs() << "With: ");
-    LLVM_DEBUG(MI.dump());
+      LLVM_DEBUG(dbgs() << "To: ");
+      LLVM_DEBUG(MI.dump());
+    }
+  }
+  // Combine RLWINM + ANDI_rec.
+  // We can treat ANDI_rec as RLWINM_rec with the SH = 0.
+  else if (MI.getOpcode() == PPC::ANDI_rec ||
+           MI.getOpcode() == PPC::ANDI8_rec) {
+    assert(MI.getOperand(2).isImm() && "Invalid PPC::ANDI_rec Instruction!");
+    uint64_t MIImm = MI.getOperand(2).getImm();
+    assert(isUIntN(16, MIImm) && "Invalid PPC::ANDI_rec Instruction!");
+    uint64_t FinalMask = MaskSrc.getZExtValue() & MIImm;
+
+    // If final mask is 0, MI result should be 0 too.
+    if (FinalMask == 0) {
+      Simplified = true;
+      LLVM_DEBUG(dbgs() << "Combining pair: ");
+      LLVM_DEBUG(SrcMI->dump());
+      LLVM_DEBUG(MI.dump());
 
-  } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
-              NewMB <= NewME) ||
-             SrcMaskFull) {
-    // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
-    // than NewME. Otherwise we get a 64 bit value after folding, but MI
-    // return a 32 bit value.
-    Simplified = true;
-    LLVM_DEBUG(dbgs() << "Converting Instr: ");
-    LLVM_DEBUG(MI.dump());
+      MI.getOperand(2).setImm(0);
+      MI.getOperand(1).setReg(ForwardReg);
+      if (SrcMI->getOperand(1).isKill()) {
+        MI.getOperand(1).setIsKill(true);
+        SrcMI->getOperand(1).setIsKill(false);
+      } else {
+        // About to replace MI.getOperand(1), clear its kill flag.
+        MI.getOperand(1).setIsKill(false);
+      }
+      LLVM_DEBUG(dbgs() << "TO: ");
+      LLVM_DEBUG(MI.dump());
+    } else if ((isRunOfOnes(FinalMask, NewMB, NewME) && NewMB <= NewME)) {
+      Simplified = true;
+      LLVM_DEBUG(dbgs() << "Combining pair: ");
+      LLVM_DEBUG(SrcMI->dump());
+      LLVM_DEBUG(MI.dump());
 
-    uint16_t NewSH = (SHSrc + SHMI) % 32;
-    MI.getOperand(2).setImm(NewSH);
-    // If SrcMI mask is full, no need to update MBMI and MEMI.
-    if (!SrcMaskFull) {
-      MI.getOperand(3).setImm(NewMB);
-      MI.getOperand(4).setImm(NewME);
+      MI.RemoveOperand(2);
+      MI.RemoveOperand(1);
+      MI.setDesc(get(Is64Bit ? PPC::RLWINM8_rec : PPC::RLWINM_rec));
+      MI.addOperand(SrcMI->getOperand(1));
+      MI.addOperand(MachineOperand::CreateImm(SHSrc));
+      MI.addOperand(MachineOperand::CreateImm(NewMB));
+      MI.addOperand(MachineOperand::CreateImm(NewME));
+      if (SrcMI->getOperand(1).isKill()) {
+        MI.getOperand(1).setIsKill(true);
+        SrcMI->getOperand(1).setIsKill(false);
+      } else {
+        // About to replace MI.getOperand(1), clear its kill flag.
+        MI.getOperand(1).setIsKill(false);
+      }
+      LLVM_DEBUG(dbgs() << "TO: ");
+      LLVM_DEBUG(MI.dump());
     }
-    MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
-    if (SrcMI->getOperand(1).isKill()) {
-      MI.getOperand(1).setIsKill(true);
-      SrcMI->getOperand(1).setIsKill(false);
-    } else
-      // About to replace MI.getOperand(1), clear its kill flag.
-      MI.getOperand(1).setIsKill(false);
-
-    LLVM_DEBUG(dbgs() << "To: ");
-    LLVM_DEBUG(MI.dump());
   }
   if (Simplified && CanErase) {
     // If SrcMI has no implicit def, and FoldingReg has no non-debug use or
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -844,6 +844,8 @@
                       combineSEXTAndSHL(MI, ToErase);
         break;
       }
+      case PPC::ANDI_rec:
+      case PPC::ANDI8_rec:
       case PPC::RLWINM:
       case PPC::RLWINM_rec:
       case PPC::RLWINM8:
diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
--- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
@@ -161,3 +161,65 @@
     dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
     BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
 ...
+---
+name: testFoldRLWINMAndANDI
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMAndANDI
+    ; CHECK: liveins: $r3
+    ; CHECK: dead renamable $r3 = RLWINM_rec killed $r3, 4, 29, 29, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM killed $r3, 4, 28, 31
+    dead renamable $r3 = ANDI_rec killed renamable $r3, 4, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testFoldRLWINMAndANDIToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMAndANDIToZero
+    ; CHECK: liveins: $r3
+    ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM killed $r3, 4, 28, 31
+    dead renamable $r3 = ANDI_rec killed renamable $r3, 16, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testRLWINMANDIInvalidMask
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testRLWINMANDIInvalidMask
+    ; CHECK: liveins: $r3
+    ; CHECK: $r3 = RLWINM killed $r3, 4, 20, 31
+    ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 9, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM killed $r3, 4, 20, 31
+    dead renamable $r3 = ANDI_rec killed renamable $r3, 9, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testCanNotFoldRLWINMAndANDI
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r2, $r3
+    ; CHECK-LABEL: name: testCanNotFoldRLWINMAndANDI
+    ; CHECK: liveins: $r2, $r3, $x2
+    ; CHECK: STD $x2, -8, $x1 :: (store 8 into %stack.0)
+    ; CHECK: $r3 = RLWINM killed $r2, 4, 28, 31
+    ; CHECK: $r2 = LI 0, implicit-def $x2
+    ; CHECK: $x2 = LD -8, $x1 :: (load 8 from %stack.0)
+    ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 4, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0, implicit $x2
+    $r3 = RLWINM killed $r2, 4, 28, 31
+    $r2 = LI 0, implicit-def $x2
+    dead renamable $r3 = ANDI_rec killed renamable $r3, 4, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0, implicit killed $x2
+...
diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir
--- a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir
@@ -192,8 +192,7 @@
     ; CHECK: liveins: $x3
     ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x3
     ; CHECK: [[COPY1:%[0-9]+]]:gprc = COPY [[COPY]].sub_32
-    ; CHECK: [[RLWINM:%[0-9]+]]:gprc = RLWINM [[COPY1]], 4, 28, 31
-    ; CHECK: [[ANDI_rec:%[0-9]+]]:gprc = ANDI_rec [[RLWINM]], 4, implicit-def $cr0
+    ; CHECK: [[RLWINM_rec:%[0-9]+]]:gprc = RLWINM_rec [[COPY1]], 4, 29, 29, implicit-def $cr0
     ; CHECK: BLR8 implicit $lr8, implicit $rm
     %0:g8rc = COPY $x3
     %1:gprc = COPY %0.sub_32:g8rc
@@ -201,3 +200,21 @@
     %3:gprc = ANDI_rec %2:gprc, 4, implicit-def $cr0
     BLR8 implicit $lr8, implicit $rm
 ...
+---
+name: testFoldRLWINMAndANDIToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    ; CHECK-LABEL: name: testFoldRLWINMAndANDIToZero
+    ; CHECK: liveins: $x3
+    ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x3
+    ; CHECK: [[COPY1:%[0-9]+]]:gprc = COPY [[COPY]].sub_32
+    ; CHECK: [[ANDI_rec:%[0-9]+]]:gprc = ANDI_rec [[COPY1]], 0, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 4, 28, 31
+    %3:gprc = ANDI_rec %2:gprc, 32, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm
+...
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
@@ -35,12 +35,10 @@
 ; CHECK-NEXT:    std r29, 56(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    paddi r29, 0, .LJTI0_0@PCREL, 1
-; CHECK-NEXT:    srwi r4, r3, 4
-; CHECK-NEXT:    srwi r3, r3, 5
-; CHECK-NEXT:    andi. r4, r4, 1
+; CHECK-NEXT:    rlwinm. r4, r3, 28, 31, 31
 ; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    crmove 4*cr4+lt, gt
-; CHECK-NEXT:    andi. r3, r3, 1
+; CHECK-NEXT:    rlwinm. r3, r3, 27, 31, 31
 ; CHECK-NEXT:    setnbc r3, gt
 ; CHECK-NEXT:    stw r3, 52(r1)
 ; CHECK-NEXT:    cmplwi cr3, r3, 336
@@ -229,12 +227,10 @@
 ; CHECK-BE-NEXT:    lwz r3, 0(r3)
 ; CHECK-BE-NEXT:    std r29, 136(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    std r30, 144(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    srwi r4, r3, 4
-; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    andi. r4, r4, 1
+; CHECK-BE-NEXT:    rlwinm. r4, r3, 28, 31, 31
 ; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    crmove 4*cr4+lt, gt
-; CHECK-BE-NEXT:    andi. r3, r3, 1
+; CHECK-BE-NEXT:    rlwinm. r3, r3, 27, 31, 31
 ; CHECK-BE-NEXT:    setnbc r3, gt
 ; CHECK-BE-NEXT:    stw r3, 132(r1)
 ; CHECK-BE-NEXT:    cmplwi cr3, r3, 336
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -15,6 +15,10 @@
 ; bit of any CR field is spilled. We need to test the spilling of a CR bit
 ; other than the LT bit. Hence this test case is rather complex.
 
+; FIXME: A redundant COPY was generated during RA.
+; i.e.   rlwinm r29, r30, 0, 24, 22
+;        mr r30, r29
+
 %0 = type { %1 }
 %1 = type { %0*, %0*, %0*, i32 }
 
@@ -34,10 +38,12 @@
 ; CHECK-NEXT:    stdu r1, -80(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r29, -24
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    .cfi_offset cr2, 8
 ; CHECK-NEXT:    .cfi_offset cr3, 8
 ; CHECK-NEXT:    .cfi_offset cr4, 8
+; CHECK-NEXT:    std r29, 56(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    bl call_2@notoc
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_13
@@ -48,7 +54,7 @@
 ; CHECK-NEXT:    # implicit-def: $r30
 ; CHECK-NEXT:    crnot 4*cr5+lt, 4*cr3+eq
 ; CHECK-NEXT:    setnbc r3, 4*cr5+lt
-; CHECK-NEXT:    stw r3, 60(r1)
+; CHECK-NEXT:    stw r3, 52(r1)
 ; CHECK-NEXT:    lwz r3, 0(r3)
 ; CHECK-NEXT:    cmpwi cr4, r3, 0
 ; CHECK-NEXT:    .p2align 4
@@ -68,16 +74,17 @@
 ; CHECK-NEXT:    bc 12, 4*cr3+eq, .LBB0_9
 ; CHECK-NEXT:  # %bb.6: # %bb32
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    rlwinm r30, r30, 0, 24, 22
-; CHECK-NEXT:    andi. r3, r30, 2
+; CHECK-NEXT:    rlwinm. r3, r30, 0, 30, 30
+; CHECK-NEXT:    rlwinm r29, r30, 0, 24, 22
 ; CHECK-NEXT:    mcrf cr2, cr0
 ; CHECK-NEXT:    bl call_4@notoc
+; CHECK-NEXT:    mr r30, r29
 ; CHECK-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-NEXT:  # %bb.7: # %bb37
 ; CHECK-NEXT:  .LBB0_8: # %bb22
 ; CHECK-NEXT:  .LBB0_9: # %bb35
 ; CHECK-NEXT:  .LBB0_10: # %bb27
-; CHECK-NEXT:    lwz r4, 60(r1)
+; CHECK-NEXT:    lwz r4, 52(r1)
 ; CHECK-NEXT:    # implicit-def: $cr5lt
 ; CHECK-NEXT:    mfocrf r3, 4
 ; CHECK-NEXT:    rlwimi r3, r4, 12, 20, 20
@@ -94,16 +101,18 @@
 ; CHECK-BE-NEXT:    mfcr r12
 ; CHECK-BE-NEXT:    std r0, 16(r1)
 ; CHECK-BE-NEXT:    stw r12, 8(r1)
-; CHECK-BE-NEXT:    stdu r1, -160(r1)
-; CHECK-BE-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-BE-NEXT:    stdu r1, -176(r1)
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-BE-NEXT:    .cfi_offset lr, 16
+; CHECK-BE-NEXT:    .cfi_offset r28, -32
 ; CHECK-BE-NEXT:    .cfi_offset r29, -24
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
-; CHECK-BE-NEXT:    std r29, 136(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, 144(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r28, 144(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    bl call_2
 ; CHECK-BE-NEXT:    nop
 ; CHECK-BE-NEXT:    bc 12, 4*cr5+lt, .LBB0_13
@@ -115,7 +124,7 @@
 ; CHECK-BE-NEXT:    # implicit-def: $r29
 ; CHECK-BE-NEXT:    crnot 4*cr5+lt, 4*cr3+eq
 ; CHECK-BE-NEXT:    setnbc r3, 4*cr5+lt
-; CHECK-BE-NEXT:    stw r3, 132(r1)
+; CHECK-BE-NEXT:    stw r3, 140(r1)
 ; CHECK-BE-NEXT:    lwz r3, 0(r3)
 ; CHECK-BE-NEXT:    cmpwi cr4, r3, 0
 ; CHECK-BE-NEXT:    .p2align 4
@@ -136,17 +145,18 @@
 ; CHECK-BE-NEXT:    bc 12, 4*cr3+eq, .LBB0_9
 ; CHECK-BE-NEXT:  # %bb.6: # %bb32
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 22
-; CHECK-BE-NEXT:    andi. r3, r29, 2
+; CHECK-BE-NEXT:    rlwinm. r3, r29, 0, 30, 30
+; CHECK-BE-NEXT:    rlwinm r28, r29, 0, 24, 22
 ; CHECK-BE-NEXT:    mcrf cr2, cr0
 ; CHECK-BE-NEXT:    bl call_4
 ; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    mr r29, r28
 ; CHECK-BE-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-BE-NEXT:  # %bb.7: # %bb37
 ; CHECK-BE-NEXT:  .LBB0_8: # %bb22
 ; CHECK-BE-NEXT:  .LBB0_9: # %bb35
 ; CHECK-BE-NEXT:  .LBB0_10: # %bb27
-; CHECK-BE-NEXT:    lwz r4, 132(r1)
+; CHECK-BE-NEXT:    lwz r4, 140(r1)
 ; CHECK-BE-NEXT:    # implicit-def: $cr5lt
 ; CHECK-BE-NEXT:    mfocrf r3, 4
 ; CHECK-BE-NEXT:    rlwimi r3, r4, 12, 20, 20
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -66,8 +66,7 @@
 ; CHECK-NEXT:    crnot 4*cr2+eq, eq
 ; CHECK-NEXT:    bl call_2@notoc
 ; CHECK-NEXT:    mr r27, r3
-; CHECK-NEXT:    srwi r3, r28, 4
-; CHECK-NEXT:    andi. r3, r3, 1
+; CHECK-NEXT:    rlwinm. r3, r28, 28, 31, 31
 ; CHECK-NEXT:    crmove 4*cr2+gt, gt
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb9
@@ -75,8 +74,7 @@
 ; CHECK-NEXT:    mr r4, r30
 ; CHECK-NEXT:    bl call_3@notoc
 ; CHECK-NEXT:  .LBB0_2: # %bb12
-; CHECK-NEXT:    srwi r3, r28, 7
-; CHECK-NEXT:    andi. r3, r3, 1
+; CHECK-NEXT:    rlwinm. r3, r28, 25, 31, 31
 ; CHECK-NEXT:    crmove 4*cr2+un, gt
 ; CHECK-NEXT:    bc 12, 4*cr2+eq, .LBB0_7
 ; CHECK-NEXT:  # %bb.3: # %bb37
@@ -214,8 +212,7 @@
 ; CHECK-BE-NEXT:    bl call_2
 ; CHECK-BE-NEXT:    nop
 ; CHECK-BE-NEXT:    mr r27, r3
-; CHECK-BE-NEXT:    srwi r3, r28, 4
-; CHECK-BE-NEXT:    andi. r3, r3, 1
+; CHECK-BE-NEXT:    rlwinm. r3, r28, 28, 31, 31
 ; CHECK-BE-NEXT:    crmove 4*cr2+gt, gt
 ; CHECK-BE-NEXT:    bc 12, 4*cr5+lt, .LBB0_2
 ; CHECK-BE-NEXT:  # %bb.1: # %bb9
@@ -224,8 +221,7 @@
 ; CHECK-BE-NEXT:    bl call_3
 ; CHECK-BE-NEXT:    nop
 ; CHECK-BE-NEXT:  .LBB0_2: # %bb12
-; CHECK-BE-NEXT:    srwi r3, r28, 7
-; CHECK-BE-NEXT:    andi. r3, r3, 1
+; CHECK-BE-NEXT:    rlwinm. r3, r28, 25, 31, 31
 ; CHECK-BE-NEXT:    crmove 4*cr2+un, gt
 ; CHECK-BE-NEXT:    bc 12, 4*cr2+eq, .LBB0_7
 ; CHECK-BE-NEXT:  # %bb.3: # %bb37
diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
--- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
@@ -113,8 +113,7 @@
 ; CHECK-NEXT:    xvtdivdp cr0, v2, v3
 ; CHECK-NEXT:    li r4, 222
 ; CHECK-NEXT:    mfocrf r3, 128
-; CHECK-NEXT:    srwi r3, r3, 28
-; CHECK-NEXT:    andi. r3, r3, 2
+; CHECK-NEXT:    rlwinm. r3, r3, 4, 30, 30
 ; CHECK-NEXT:    li r3, 22
 ; CHECK-NEXT:    iseleq r3, r4, r3
 ; CHECK-NEXT:    blr