diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -82,6 +82,30 @@ return false; } +static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { + if (!Val) + return false; + + if (isShiftedMask_64(Val)) { + // look for the first non-zero bit + MB = countLeadingZeros(Val); + // look for the first zero bit after the run of ones + ME = countLeadingZeros((Val - 1) ^ Val); + return true; + } else { + Val = ~Val; // invert mask + if (isShiftedMask_64(Val)) { + // effectively look for the first zero bit + ME = countLeadingZeros(Val) - 1; + // effectively look for the first one bit after the run of zeros + MB = countLeadingZeros((Val - 1) ^ Val) + 1; + return true; + } + } + // no run present + return false; +} + } // end namespace llvm // Generated files will use "namespace PPC". To avoid symbol clash, diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4456,6 +4456,26 @@ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); return true; } + + // It is not 16-bit imm that means we need two instructions at least if + // using "and" instruction. Try to exploit it with rotate mask instructions. + if (isRunOfOnes64(Imm64, MB, ME)) { + if (MB >= 32 && MB <= ME) { + // MB ME + // +----------------------+ + // |xxxxxxxxxxx00011111000| + // +----------------------+ + // 0 32 64 + // We can only do it if the MB is larger than 32 and MB <= ME + // as RLWINM will replace the content of [0 - 32) with [32 - 64) even + // we didn't rotate it. + SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl), + getI64Imm(ME - 32, dl) }; + CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); + return true; + } + // TODO - handle it with rldicl + rldicl + } } return false; diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll --- a/llvm/test/CodeGen/PowerPC/and-mask.ll +++ b/llvm/test/CodeGen/PowerPC/and-mask.ll @@ -5,10 +5,7 @@ define i32 @test1(i32 %a) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: lis 4, 32767 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: sldi 4, 4, 1 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rlwinm 3, 3, 0, 0, 30 ; CHECK-NEXT: blr %and = and i32 %a, -2 ret i32 %and diff --git a/llvm/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll b/llvm/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll --- a/llvm/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll +++ b/llvm/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll @@ -7,12 +7,12 @@ %2 = zext i32 %1 to i64 %3 = shl i64 %2, 48 %4 = ashr exact i64 %3, 48 -; CHECK: ANDI8o killed {{[^,]+}}, 65520, implicit-def dead $cr0 +; CHECK: RLWINM8 killed {{[^,]+}}, 0, 16, 27 ; CHECK: CMPLDI ; CHECK: BCC ; CHECK: ANDI8o {{[^,]+}}, 65520, implicit-def $cr0 -; CHECK: COPY $cr0 +; CHECK: COPY killed $cr0 ; CHECK: BCC %5 = icmp eq i64 %4, 0 br i1 %5, label %foo, label %bar diff --git a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll --- a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll +++ b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll @@ -299,7 +299,7 @@ ; FAST: # %bb.0: ; FAST-NEXT: clrldi 3, 3, 48 ; FAST-NEXT: popcntd 3, 3 -; FAST-NEXT: andi. 3, 3, 16 +; FAST-NEXT: rlwinm 3, 3, 0, 27, 27 ; FAST-NEXT: blr ; ; SLOW-LABEL: popa_i16_i64: @@ -325,7 +325,7 @@ ; SLOW-NEXT: ori 4, 4, 257 ; SLOW-NEXT: mullw 3, 3, 4 ; SLOW-NEXT: srwi 3, 3, 24 -; SLOW-NEXT: andi. 3, 3, 16 +; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27 ; SLOW-NEXT: blr %pop = call i16 @llvm.ctpop.i16(i16 %x) %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext