diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -347,6 +347,7 @@ bool tryAsSingleRLWINM(SDNode *N); bool tryAsSingleRLWINM8(SDNode *N); bool tryAsSingleRLWIMI(SDNode *N); + bool tryAsPairOfRLDICL(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -4438,6 +4439,84 @@ return false; } +bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; + + // Do nothing if it is 16-bit imm as the pattern in the .td file handle + // it well with "andi.". + if (isUInt<16>(Imm64)) + return false; + + unsigned MB, ME; + SDLoc Loc(N); + SDValue Val = N->getOperand(0); + if (isRunOfOnes64(Imm64, MB, ME)) { + + // Optimize it with two rldicl instructions. + // MB ME MB+63-ME + // +----------------------+ +----------------------+ + // |0000001111111111111000| -> |0000000001111111111111| + // +----------------------+ +----------------------+ + // 0 64 0 64 + // Left rotate ME + 1 bit first and then, mask with (MB + 63 - ME, 63), + // and then, rotate back. + Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, + getI64Imm(ME + 1, Loc), + getI64Imm((MB + 63 - ME) & 63, Loc)), + 0); + SDValue Ops[] = {Val, getI64Imm(63 - ME, Loc), getI64Imm(0, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; + } + + // We can do special handling for pattern like this. + // MB MB2 ME2 ME + // +----------------------+ + // |0001111100000011111000| + // +----------------------+ + MB = countLeadingZeros(Imm64); + ME = 63 - countTrailingZeros(Imm64); + if (ME != 63) + return false; + + // Get the invert mask of [MB, ME]. + uint64_t InvertMask = + ~Imm64 & APInt::getBitsSet(64, 63 - ME, 64 - MB).getZExtValue(); + unsigned MB2, ME2; + if (!isRunOfOnes64(InvertMask, MB2, ME2)) + return false; + // Correct the MB2 and ME2 to be the position of last '1' of the first ones, + // and the position of first '1' of the second ones. + MB2--; + ME2++; + assert(MB2 <= ME2 && "Invalid MB and ME"); + + // As the number of trailing zeros is zero, we can optimize it with two + // rldicl. + // MB MB2 ME2 ME MB MB2 ME2 ME + // +----------------------+ +----------------------+ + // |1111111111111111111111| -> |0000001111111111111111+ + // +----------------------+ +----------------------+ + // 0 32 64 0 32 64 + // Rotate left MB2 + 1 bits and then, clear the bits (MB2, ME2) + Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, + {Val, getI64Imm(MB2 + 1, Loc), + getI64Imm(ME2 - MB2 - 1, Loc)}), + 0); + // MB MB2 ME2 ME MB MB2 ME2 ME + // +----------------------+ +----------------------+ + // |0000001111111111111111| -> |0001111100000011111111+ + // +----------------------+ +----------------------+ + // 0 32 64 0 32 64 + // Rotate back and then, clear the bits [0, MB) + SDValue Ops[] = {Val, getI64Imm(63 - MB2, Loc), getI64Imm(MB, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; @@ -4765,7 +4844,7 @@ case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || - tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N)) + tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. diff --git a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll --- a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll +++ b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll @@ -7,8 +7,8 @@ %typ = type { i32, i32 } ; On release builds, it doesn't crash, spewing nonsense instead. -; To make sure it works, check that and is still alive. -; CHECK: and +; To make sure it works, check that rldicl is still alive. +; CHECK: rldicl ; Also, in release, it emits a COPY from a 32-bit register to ; a 64-bit register, which happens to be emitted as cror [!] ; by the confused CodeGen. Just to be sure, check there isn't one. diff --git a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll --- a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll @@ -43,15 +43,13 @@ ; PPC64-LINUX-LABEL: f1 ; PPC64-LINUX: std 31, -8(1) ; PPC64-LINUX-NEXT: stdu 1, -64(1) -; PPC64-LINUX-NEXT: lis 4, 32767 ; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30 -; PPC64-LINUX-NEXT: ori 4, 4, 65535 -; PPC64-LINUX-NEXT: addi 3, 3, 15 -; PPC64-LINUX-NEXT: sldi 4, 4, 4 ; PPC64-LINUX-NEXT: mr 31, 1 -; PPC64-LINUX-NEXT: and 3, 3, 4 -; PPC64-LINUX-NEXT: neg 3, 3 +; PPC64-LINUX-NEXT: addi 3, 3, 15 +; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 33 ; PPC64-LINUX-NEXT: addi 4, 31, 64 +; PPC64-LINUX-NEXT: rotldi 3, 3, 4 +; PPC64-LINUX-NEXT: neg 3, 3 ; PPC64-LINUX-NEXT: stdux 4, 1, 3 ; The linkage area is always put on the top of the stack. @@ -82,14 +80,12 @@ ; PPC64-AIX-LABEL: f1 ; PPC64-AIX: std 31, -8(1) ; PPC64-AIX-NEXT: stdu 1, -64(1) -; PPC64-AIX-NEXT: lis 4, 32767 ; PPC64-AIX-NEXT: rldic 3, 3, 2, 30 -; PPC64-AIX-NEXT: ori 4, 4, 65535 -; PPC64-AIX-NEXT: addi 3, 3, 15 -; PPC64-AIX-NEXT: sldi 4, 4, 4 ; PPC64-AIX-NEXT: mr 31, 1 -; PPC64-AIX-NEXT: and 3, 3, 4 +; PPC64-AIX-NEXT: addi 3, 3, 15 ; PPC64-AIX-NEXT: addi 4, 31, 64 +; PPC64-AIX-NEXT: rldicl 3, 3, 60, 33 +; PPC64-AIX-NEXT: rotldi 3, 3, 4 ; PPC64-AIX-NEXT: neg 3, 3 ; PPC64-AIX-NEXT: stdux 4, 1, 3 diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll --- a/llvm/test/CodeGen/PowerPC/and-mask.ll +++ b/llvm/test/CodeGen/PowerPC/and-mask.ll @@ -15,8 +15,8 @@ define i64 @test2(i64 %a) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -7 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 61, 2 +; CHECK-NEXT: rotldi 3, 3, 3 ; CHECK-NEXT: blr %and = and i64 %a, -7 ret i64 %and @@ -26,10 +26,8 @@ define i64 @test3(i64 %a) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: lis 4, 1023 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: sldi 4, 4, 22 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 42, 38 +; CHECK-NEXT: rotldi 3, 3, 22 ; CHECK-NEXT: blr %and = and i64 %a, 281474972516352 ret i64 %and @@ -39,10 +37,8 @@ define i64 @test4(i64 %a) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 12 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: ori 4, 4, 255 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 30, 26 +; CHECK-NEXT: rldicl 3, 3, 34, 28 ; CHECK-NEXT: blr %and = and i64 %a, 51539607807 ret i64 %and @@ -52,10 +48,8 @@ define i64 @test5(i64 %a) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: oris 4, 4, 65472 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 42, 6 +; CHECK-NEXT: rldicl 3, 3, 22, 32 ; CHECK-NEXT: blr %and = and i64 %a, 4290838527 ret i64 %and @@ -77,11 +71,8 @@ define i64 @test7(i64 %a) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -32767 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: oris 4, 4, 65024 -; CHECK-NEXT: rldicr 4, 4, 17, 63 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 22, 25 +; CHECK-NEXT: rldicl 3, 3, 42, 14 ; CHECK-NEXT: blr %and = and i64 %a, 1121501860462591 ret i64 %and diff --git a/llvm/test/CodeGen/PowerPC/cmpb.ll b/llvm/test/CodeGen/PowerPC/cmpb.ll --- a/llvm/test/CodeGen/PowerPC/cmpb.ll +++ b/llvm/test/CodeGen/PowerPC/cmpb.ll @@ -123,11 +123,9 @@ ret i32 %or55 ; CHECK-LABEL: @test32p1 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5 +; CHECK: rldicl 3, [[REG2]], 24, 32 ; CHECK: blr } @@ -147,11 +145,9 @@ ret i32 %or37 ; CHECK-LABEL: @test32p2 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8 +; CHECK: rldicl 3, [[REG2]], 24, 32 ; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -481,9 +481,9 @@ define i1 @or_icmps_const_1bit_diff(i64 %x) { ; CHECK-LABEL: or_icmps_const_1bit_diff: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -5 ; CHECK-NEXT: addi 3, 3, -13 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 61, 1 +; CHECK-NEXT: rotldi 3, 3, 3 ; CHECK-NEXT: cntlzd 3, 3 ; CHECK-NEXT: rldicl 3, 3, 58, 63 ; CHECK-NEXT: blr