diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1197,6 +1197,60 @@ combineSEXTAndSHL(MI, ToErase); break; } + case PPC::ANDI_rec: + case PPC::ANDI8_rec: + case PPC::ANDIS_rec: + case PPC::ANDIS8_rec: { + Register TrueReg = + TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); + if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(TrueReg); + if (!SrcMI) + break; + + unsigned SrcOpCode = SrcMI->getOpcode(); + if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR) + break; + + uint64_t AndImm = MI.getOperand(2).getImm(); + if (MI.getOpcode() == PPC::ANDIS_rec || + MI.getOpcode() == PPC::ANDIS8_rec) + AndImm <<= 16; + uint64_t LZeroAndImm = llvm::countl_zero(AndImm); + uint64_t RZeroAndImm = llvm::countr_zero(AndImm); + uint64_t ImmSrc = SrcMI->getOperand(3).getImm(); + + // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0` + // if all bits to AND are already zero in the input. + bool PatternResultZero = + (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) || + (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc); + + // We can eliminate RLDICL/RLDICR if it's used to clear bits and all + // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec. + bool PatternRemoveRotate = + SrcMI->getOperand(2).getImm() == 0 && + ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) || + (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63))); + + if (!PatternResultZero && !PatternRemoveRotate) + break; + + LLVM_DEBUG(dbgs() << "Combining pair: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(MI.dump()); + if (PatternResultZero) + MI.getOperand(2).setImm(0); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + addRegToUpdate(MI.getOperand(1).getReg()); + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + Simplified = true; + SrcMI->eraseFromParent(); + break; + } case PPC::RLWINM: case PPC::RLWINM_rec: case PPC::RLWINM8: diff --git a/llvm/test/CodeGen/PowerPC/fold-rot-and-peephole.mir b/llvm/test/CodeGen/PowerPC/fold-rot-and-peephole.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fold-rot-and-peephole.mir @@ -0,0 +1,156 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir \ +# RUN: -verify-machineinstrs -run-pass ppc-mi-peepholes < %s | FileCheck %s + +--- +name: fold_RLDICL_ANDI +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICL_ANDI + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 1, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICL %1:g8rc, 0, 32 + %3:g8rc = ANDI8_rec killed %2:g8rc, 1, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICL_ANDI2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICL_ANDI2 + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 0, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICL %1:g8rc, 10, 60 + %3:g8rc = ANDI8_rec killed %2:g8rc, 32, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICR_ANDI +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICR_ANDI + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 16, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICR %1:g8rc, 0, 60 + %3:g8rc = ANDI8_rec killed %2:g8rc, 16, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICR_ANDI2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICR_ANDI2 + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDI8_rec:%[0-9]+]]:g8rc = ANDI8_rec killed [[COPY]], 0, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDI8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICR %1:g8rc, 10, 60 + %3:g8rc = ANDI8_rec killed %2:g8rc, 1, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICL_ANDIS +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICL_ANDIS + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 1, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICL %1:g8rc, 0, 32 + %3:g8rc = ANDIS8_rec killed %2:g8rc, 1, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICL_ANDIS2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICL_ANDIS2 + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 0, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICL %1:g8rc, 10, 60 + %3:g8rc = ANDIS8_rec killed %2:g8rc, 16, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICR_ANDIS +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICR_ANDIS + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 16, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICR %1:g8rc, 0, 60 + %3:g8rc = ANDIS8_rec killed %2:g8rc, 16, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... +--- +name: fold_RLDICR_ANDIS2 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + ; CHECK-LABEL: name: fold_RLDICR_ANDIS2 + ; CHECK: liveins: $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY killed $x3 + ; CHECK-NEXT: [[ANDIS8_rec:%[0-9]+]]:g8rc = ANDIS8_rec killed [[COPY]], 0, implicit-def dead $cr0 + ; CHECK-NEXT: $x3 = COPY killed [[ANDIS8_rec]] + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + %1:g8rc = COPY $x3 + %2:g8rc = RLDICR %1:g8rc, 10, 32 + %3:g8rc = ANDIS8_rec killed %2:g8rc, 1, implicit-def dead $cr0 + $x3 = COPY %3:g8rc + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -2204,8 +2204,7 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LABEL: getvelsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: clrldi r3, r5, 32 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: andi. r3, r5, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2225,7 +2224,6 @@ ; ; CHECK-AIX-LABEL: getvelsl: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2242,8 +2240,7 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LABEL: getvelul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: clrldi r3, r5, 32 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: andi. r3, r5, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2263,7 +2260,6 @@ ; ; CHECK-AIX-LABEL: getvelul: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3 @@ -2461,8 +2457,7 @@ define double @getveld(<2 x double> %vd, i32 signext %i) { ; CHECK-LABEL: getveld: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: clrldi r3, r5, 32 -; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: andi. r3, r5, 1 ; CHECK-NEXT: sldi r3, r3, 3 ; CHECK-NEXT: lvsl v3, 0, r3 ; CHECK-NEXT: vperm v2, v2, v2, v3 @@ -2484,7 +2479,6 @@ ; ; CHECK-AIX-LABEL: getveld: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: clrldi 3, 3, 32 ; CHECK-AIX-NEXT: andi. 3, 3, 1 ; CHECK-AIX-NEXT: sldi 3, 3, 3 ; CHECK-AIX-NEXT: lvsl 3, 0, 3