diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -351,6 +351,7 @@ bool tryAsSingleRLWINM(SDNode *N); bool tryAsSingleRLWINM8(SDNode *N); bool tryAsSingleRLWIMI(SDNode *N); + bool tryAsPairOfRLDICL(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -4439,6 +4440,60 @@ return false; } +bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; + + // Do nothing if it is 16-bit imm as the pattern in the .td file handle + // it well with "andi.". + if (isUInt<16>(Imm64)) + return false; + + SDLoc Loc(N); + SDValue Val = N->getOperand(0); + + // Optimized with two rldicl's as follows: + // Add missing bits on left to the mask and check that the mask is a + // wrapped run of ones, i.e. + // Change pattern |0001111100000011111111| + // to |1111111100000011111111|. + unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); + if (NumOfLeadingZeros != 0) + Imm64 |= maskLeadingOnes(NumOfLeadingZeros); + + unsigned MB, ME; + if (!isRunOfOnes64(Imm64, MB, ME)) + return false; + + // ME MB MB-ME+63 + // +----------------------+ +----------------------+ + // |1111111100000011111111| -> |0000001111111111111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. + unsigned OnesOnLeft = ME + 1; + unsigned ZerosInBetween = (MB - ME + 63) & 63; + // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear + // on the left the bits that are already zeros in the mask. + Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, + getI64Imm(OnesOnLeft, Loc), + getI64Imm(ZerosInBetween, Loc)), + 0); + // MB-ME+63 ME MB + // +----------------------+ +----------------------+ + // |0000001111111111111111| -> |0001111100000011111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the + // left the number of ones we previously added. + SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), + getI64Imm(NumOfLeadingZeros, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; @@ -4766,7 +4821,7 @@ case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || - tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N)) + tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. diff --git a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll --- a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll +++ b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll @@ -7,8 +7,8 @@ %typ = type { i32, i32 } ; On release builds, it doesn't crash, spewing nonsense instead. -; To make sure it works, check that and is still alive. -; CHECK: and +; To make sure it works, check that rldicl is still alive. +; CHECK: rldicl ; Also, in release, it emits a COPY from a 32-bit register to ; a 64-bit register, which happens to be emitted as cror [!] ; by the confused CodeGen. Just to be sure, check there isn't one. diff --git a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll --- a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll @@ -43,15 +43,13 @@ ; PPC64-LINUX-LABEL: f1 ; PPC64-LINUX: std 31, -8(1) ; PPC64-LINUX-NEXT: stdu 1, -64(1) -; PPC64-LINUX-NEXT: lis 4, 32767 ; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30 -; PPC64-LINUX-NEXT: ori 4, 4, 65535 -; PPC64-LINUX-NEXT: addi 3, 3, 15 -; PPC64-LINUX-NEXT: sldi 4, 4, 4 ; PPC64-LINUX-NEXT: mr 31, 1 -; PPC64-LINUX-NEXT: and 3, 3, 4 -; PPC64-LINUX-NEXT: neg 3, 3 +; PPC64-LINUX-NEXT: addi 3, 3, 15 +; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4 ; PPC64-LINUX-NEXT: addi 4, 31, 64 +; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29 +; PPC64-LINUX-NEXT: neg 3, 3 ; PPC64-LINUX-NEXT: stdux 4, 1, 3 ; The linkage area is always put on the top of the stack. @@ -82,14 +80,12 @@ ; PPC64-AIX-LABEL: f1 ; PPC64-AIX: std 31, -8(1) ; PPC64-AIX-NEXT: stdu 1, -64(1) -; PPC64-AIX-NEXT: lis 4, 32767 ; PPC64-AIX-NEXT: rldic 3, 3, 2, 30 -; PPC64-AIX-NEXT: ori 4, 4, 65535 -; PPC64-AIX-NEXT: addi 3, 3, 15 -; PPC64-AIX-NEXT: sldi 4, 4, 4 ; PPC64-AIX-NEXT: mr 31, 1 -; PPC64-AIX-NEXT: and 3, 3, 4 +; PPC64-AIX-NEXT: addi 3, 3, 15 ; PPC64-AIX-NEXT: addi 4, 31, 64 +; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4 +; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29 ; PPC64-AIX-NEXT: neg 3, 3 ; PPC64-AIX-NEXT: stdux 4, 1, 3 diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll --- a/llvm/test/CodeGen/PowerPC/and-mask.ll +++ b/llvm/test/CodeGen/PowerPC/and-mask.ll @@ -15,8 +15,8 @@ define i64 @test2(i64 %a) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -7 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 61, 2 +; CHECK-NEXT: rotldi 3, 3, 3 ; CHECK-NEXT: blr %and = and i64 %a, -7 ret i64 %and @@ -26,10 +26,8 @@ define i64 @test3(i64 %a) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: lis 4, 1023 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: sldi 4, 4, 22 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 42, 22 +; CHECK-NEXT: rldicl 3, 3, 22, 16 ; CHECK-NEXT: blr %and = and i64 %a, 281474972516352 ret i64 %and @@ -39,10 +37,8 @@ define i64 @test4(i64 %a) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 12 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: ori 4, 4, 255 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 30, 26 +; CHECK-NEXT: rldicl 3, 3, 34, 28 ; CHECK-NEXT: blr %and = and i64 %a, 51539607807 ret i64 %and @@ -52,10 +48,8 @@ define i64 @test5(i64 %a) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: oris 4, 4, 65472 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 42, 6 +; CHECK-NEXT: rldicl 3, 3, 22, 32 ; CHECK-NEXT: blr %and = and i64 %a, 4290838527 ret i64 %and @@ -77,11 +71,8 @@ define i64 @test7(i64 %a) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -32767 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: oris 4, 4, 65024 -; CHECK-NEXT: rldicr 4, 4, 17, 63 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 22, 25 +; CHECK-NEXT: rldicl 3, 3, 42, 14 ; CHECK-NEXT: blr %and = and i64 %a, 1121501860462591 ret i64 %and diff --git a/llvm/test/CodeGen/PowerPC/cmpb.ll b/llvm/test/CodeGen/PowerPC/cmpb.ll --- a/llvm/test/CodeGen/PowerPC/cmpb.ll +++ b/llvm/test/CodeGen/PowerPC/cmpb.ll @@ -123,11 +123,9 @@ ret i32 %or55 ; CHECK-LABEL: @test32p1 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5 +; CHECK: rldicl 3, [[REG2]], 24, 32 ; CHECK: blr } @@ -147,11 +145,9 @@ ret i32 %or37 ; CHECK-LABEL: @test32p2 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8 +; CHECK: rldicl 3, [[REG2]], 24, 32 ; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -481,9 +481,9 @@ define i1 @or_icmps_const_1bit_diff(i64 %x) { ; CHECK-LABEL: or_icmps_const_1bit_diff: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -5 ; CHECK-NEXT: addi 3, 3, -13 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 61, 1 +; CHECK-NEXT: rotldi 3, 3, 3 ; CHECK-NEXT: cntlzd 3, 3 ; CHECK-NEXT: rldicl 3, 3, 58, 63 ; CHECK-NEXT: blr