diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1090,6 +1090,58 @@ ++NumRotatesCollapsed; break; } + case PPC::ANDI_rec: + case PPC::ANDI8_rec: { + // We can eliminate RLDICL or RLDICR if it's used to clear the + // high-order or low-order n bits and all bits cleared will be ANDed + // with 0 by ANDI_rec. + Register TrueReg = + TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); + if (!TrueReg.isVirtual()) + break; + + if (!MRI->hasOneNonDBGUse(TrueReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(TrueReg); + if (!SrcMI) + break; + + unsigned SrcOpCode = SrcMI->getOpcode(); + if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR) + break; + + uint64_t SHSrc = SrcMI->getOperand(2).getImm(); + if (SHSrc != 0) + break; + + uint64_t AndImm = MI.getOperand(2).getImm(); + uint64_t MaskSrc, MaskMI; + if (SrcOpCode == PPC::RLDICL) { + // RLDICL can be used to clear the high-order n bits by setting SH=0 + // and MB=n. + MaskSrc = SrcMI->getOperand(3).getImm(); + MaskMI = llvm::countl_zero(AndImm); + } else { + // RLDICR can be used to clear the low-order n bits by setting SH=0 + // and ME=63-n. + MaskSrc = 63 - SrcMI->getOperand(3).getImm(); + MaskMI = llvm::countr_zero(AndImm); + } + if (MaskSrc > MaskMI) + break; + + LLVM_DEBUG(dbgs() << "Combining pair: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(MI.dump()); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + + SrcMI->eraseFromParent(); + Simplified = true; + break; + } // We will replace TD/TW/TDI/TWI with an unconditional trap if it will // always trap, we will delete the node if it will never trap. case PPC::TDI: diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -2204,8 +2204,7 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LABEL: getvelsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: clrldi r3, r5, 32 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: andi. r3, r5, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2225,7 +2224,6 @@ ; ; CHECK-AIX-LABEL: getvelsl: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2242,8 +2240,7 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LABEL: getvelul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: clrldi r3, r5, 32 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: andi. r3, r5, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2263,7 +2260,6 @@ ; ; CHECK-AIX-LABEL: getvelul: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2461,8 +2457,7 @@ define double @getveld(<2 x double> %vd, i32 signext %i) { ; CHECK-LABEL: getveld: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: clrldi r3, r5, 32 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: andi. r3, r5, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2484,7 +2479,6 @@ ; ; CHECK-AIX-LABEL: getveld: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3