diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -347,6 +347,7 @@ bool tryAsSingleRLWINM(SDNode *N); bool tryAsSingleRLWINM8(SDNode *N); bool tryAsSingleRLWIMI(SDNode *N); + bool tryAsPairOfRLDICL(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -4438,6 +4439,78 @@ return false; } +bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; + + // Do nothing if it is 16-bit imm as the pattern in the .td file handle + // it well with "andi.". + if (isUInt<16>(Imm64)) + return false; + + unsigned MB, ME; + SDLoc Loc(N); + SDValue Val = N->getOperand(0); + // Right rotate M bits, and clear left N bits + auto RotateRightClearLeft = [&](SDValue Val, int M, int N) { + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, + getI64Imm((64 - M) & 63, Loc), + getI64Imm(N, Loc)), + 0); + }; + + if (isRunOfOnes64(Imm64, MB, ME)) { + // Optimize it with two rldicl instructions. + // MB ME MB+63-ME + // +----------------------+ +----------------------+ + // |0000001111111111111000| -> |0000000001111111111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // Right rotate 63-ME bits first and clear left MB+63-ME bits + Val = RotateRightClearLeft(Val, 63 - ME, (MB + 63 - ME) & 63); + // Left rotate 63-ME bits. + SDValue Ops[] = {Val, getI64Imm(63 - ME, Loc), getI64Imm(0, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; + } + + // We can do special handling for pattern like this. + // +----------------------+ + // |0001111100000011111111| + // +----------------------+ + MB = countLeadingZeros(Imm64); + // Get the invert mask of [MB, ME]. + uint64_t InvertMask = + ~Imm64 & APInt::getBitsSet(64, 0, 64 - MB).getZExtValue(); + unsigned MB2, ME2; + if (!isRunOfOnes64(InvertMask, MB2, ME2)) + return false; + // Correct the MB2 and ME2 to be the position of last '1' of the first ones, + // and the position of first '1' of the second ones. + MB2--; + ME2++; + assert(MB2 < ME2 && "Invalid MB and ME"); + + // MB MB2 ME2 ME MB MB2 ME2 ME + // +----------------------+ +----------------------+ + // |xxxxxxxx------xxxxxxxx| -> |000000xxxxxxxxxxxxxxxx+ + // +----------------------+ +----------------------+ + // 0 63 0 63 + // Rotate left MB2 bits and clear left ME2-MB2-1 bits + Val = RotateRightClearLeft(Val, 63 - MB2, ME2 - MB2 - 1); + // MB MB2 ME2 ME MB MB2 ME2 ME + // +----------------------+ +----------------------+ + // |---xxxxx000000xxxxxxxx| -> |000xxxxx000000xxxxxxxx+ + // +----------------------+ +----------------------+ + // 0 63 0 63 + // Rotate right MB2 bits and clear left MB bits + SDValue Ops[] = {Val, getI64Imm(63 - MB2, Loc), getI64Imm(MB, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; @@ -4765,7 +4838,7 @@ case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || - tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N)) + tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. diff --git a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll --- a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll +++ b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll @@ -7,8 +7,8 @@ %typ = type { i32, i32 } ; On release builds, it doesn't crash, spewing nonsense instead. -; To make sure it works, check that and is still alive. -; CHECK: and +; To make sure it works, check that rldicl is still alive. +; CHECK: rldicl ; Also, in release, it emits a COPY from a 32-bit register to ; a 64-bit register, which happens to be emitted as cror [!] ; by the confused CodeGen. Just to be sure, check there isn't one. diff --git a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll --- a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll @@ -43,15 +43,13 @@ ; PPC64-LINUX-LABEL: f1 ; PPC64-LINUX: std 31, -8(1) ; PPC64-LINUX-NEXT: stdu 1, -64(1) -; PPC64-LINUX-NEXT: lis 4, 32767 ; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30 -; PPC64-LINUX-NEXT: ori 4, 4, 65535 -; PPC64-LINUX-NEXT: addi 3, 3, 15 -; PPC64-LINUX-NEXT: sldi 4, 4, 4 ; PPC64-LINUX-NEXT: mr 31, 1 -; PPC64-LINUX-NEXT: and 3, 3, 4 -; PPC64-LINUX-NEXT: neg 3, 3 +; PPC64-LINUX-NEXT: addi 3, 3, 15 +; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 33 ; PPC64-LINUX-NEXT: addi 4, 31, 64 +; PPC64-LINUX-NEXT: rotldi 3, 3, 4 +; PPC64-LINUX-NEXT: neg 3, 3 ; PPC64-LINUX-NEXT: stdux 4, 1, 3 ; The linkage area is always put on the top of the stack. @@ -82,14 +80,12 @@ ; PPC64-AIX-LABEL: f1 ; PPC64-AIX: std 31, -8(1) ; PPC64-AIX-NEXT: stdu 1, -64(1) -; PPC64-AIX-NEXT: lis 4, 32767 ; PPC64-AIX-NEXT: rldic 3, 3, 2, 30 -; PPC64-AIX-NEXT: ori 4, 4, 65535 -; PPC64-AIX-NEXT: addi 3, 3, 15 -; PPC64-AIX-NEXT: sldi 4, 4, 4 ; PPC64-AIX-NEXT: mr 31, 1 -; PPC64-AIX-NEXT: and 3, 3, 4 +; PPC64-AIX-NEXT: addi 3, 3, 15 ; PPC64-AIX-NEXT: addi 4, 31, 64 +; PPC64-AIX-NEXT: rldicl 3, 3, 60, 33 +; PPC64-AIX-NEXT: rotldi 3, 3, 4 ; PPC64-AIX-NEXT: neg 3, 3 ; PPC64-AIX-NEXT: stdux 4, 1, 3 diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll --- a/llvm/test/CodeGen/PowerPC/and-mask.ll +++ b/llvm/test/CodeGen/PowerPC/and-mask.ll @@ -15,8 +15,8 @@ define i64 @test2(i64 %a) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -7 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 61, 2 +; CHECK-NEXT: rotldi 3, 3, 3 ; CHECK-NEXT: blr %and = and i64 %a, -7 ret i64 %and @@ -26,10 +26,8 @@ define i64 @test3(i64 %a) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: lis 4, 1023 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: sldi 4, 4, 22 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 42, 38 +; CHECK-NEXT: rotldi 3, 3, 22 ; CHECK-NEXT: blr %and = and i64 %a, 281474972516352 ret i64 %and @@ -39,10 +37,8 @@ define i64 @test4(i64 %a) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 12 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: ori 4, 4, 255 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 30, 26 +; CHECK-NEXT: rldicl 3, 3, 34, 28 ; CHECK-NEXT: blr %and = and i64 %a, 51539607807 ret i64 %and @@ -52,10 +48,8 @@ define i64 @test5(i64 %a) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: oris 4, 4, 65472 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 42, 6 +; CHECK-NEXT: rldicl 3, 3, 22, 32 ; CHECK-NEXT: blr %and = and i64 %a, 4290838527 ret i64 %and @@ -77,11 +71,8 @@ define i64 @test7(i64 %a) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -32767 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: oris 4, 4, 65024 -; CHECK-NEXT: rldicr 4, 4, 17, 63 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 22, 25 +; CHECK-NEXT: rldicl 3, 3, 42, 14 ; CHECK-NEXT: blr %and = and i64 %a, 1121501860462591 ret i64 %and diff --git a/llvm/test/CodeGen/PowerPC/cmpb.ll b/llvm/test/CodeGen/PowerPC/cmpb.ll --- a/llvm/test/CodeGen/PowerPC/cmpb.ll +++ b/llvm/test/CodeGen/PowerPC/cmpb.ll @@ -123,11 +123,9 @@ ret i32 %or55 ; CHECK-LABEL: @test32p1 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5 +; CHECK: rldicl 3, [[REG2]], 24, 32 ; CHECK: blr } @@ -147,11 +145,9 @@ ret i32 %or37 ; CHECK-LABEL: @test32p2 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8 +; CHECK: rldicl 3, [[REG2]], 24, 32 ; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -481,9 +481,9 @@ define i1 @or_icmps_const_1bit_diff(i64 %x) { ; CHECK-LABEL: or_icmps_const_1bit_diff: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, -5 ; CHECK-NEXT: addi 3, 3, -13 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 61, 1 +; CHECK-NEXT: rotldi 3, 3, 3 ; CHECK-NEXT: cntlzd 3, 3 ; CHECK-NEXT: rldicl 3, 3, 58, 63 ; CHECK-NEXT: blr