diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -348,6 +348,7 @@ bool tryAsSingleRLWINM8(SDNode *N); bool tryAsSingleRLWIMI(SDNode *N); bool tryAsPairOfRLDICL(SDNode *N); + bool tryAsPairOfRLWINM8(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -4473,6 +4474,67 @@ return true; } +bool PPCDAGToDAGISel::tryAsPairOfRLWINM8(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; + + // Do nothing if it is 16-bit imm as the pattern in the .td file handle + // it well with "andi.". + if (isUInt<16>(Imm64)) + return false; + + // We can do special handling for pattern like this. + // MB MB2 ME2 ME + // +----------------------+ + // |0001111100000011111000| + // +----------------------+ + unsigned MB = countLeadingZeros(Imm64); + unsigned ME = 63 - countTrailingZeros(Imm64); + // Get the invert mask of [MB, ME]. + uint64_t InvertMask = + ~Imm64 & APInt::getBitsSet(64, 63 - ME, 64 - MB).getZExtValue(); + unsigned MB2, ME2; + if (!isRunOfOnes64(InvertMask, MB2, ME2)) + return false; + // Correct the MB2 and ME2 to be the position of last '1' of the first ones, + // and the position of first '1' of the second ones. + MB2--; + ME2++; + assert(MB2 <= ME2 && "Invalid MB and ME"); + + if (MB < 32) + return false; + + // If it is within [32, 64), we can optimize it with two rlwinm. + // MB MB2 ME2 ME MB MB2 ME2 ME + // +----------------------+ +----------------------+ + // |1111111111111111111111| -> |1111111100000011111111+ + // +----------------------+ +----------------------+ + // 0 32 64 0 32 64 + // Clear the bit (MB2, ME2) first. It is a warpping mask. + // RLWINM 0, ME2, MB2 + SDLoc Loc(N); + SDValue Val = N->getOperand(0); + Val = SDValue( + CurDAG->getMachineNode(PPC::RLWINM8, Loc, MVT::i64, + {Val, getI64Imm(0, Loc), getI64Imm(ME2 - 32, Loc), + getI64Imm(MB2 - 32, Loc)}), + 0); + // MB MB2 ME2 ME MB MB2 ME2 ME + // +----------------------+ +----------------------+ + // |1111111100000011111111| -> |0001111100000011111000+ + // +----------------------+ +----------------------+ + // 0 32 64 0 32 64 + // Clear the bit (ME, MB). Notice that, as RLWINM will update the bit + // [0, 31], we can only do this optimization if MB >= 32. + SDValue Ops[] = {Val, getI64Imm(0, Loc), getI64Imm(MB - 32, Loc), + getI64Imm(ME - 32, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); + return true; +} + bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; @@ -4800,7 +4862,8 @@ case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || - tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) + tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N) || + tryAsPairOfRLWINM8(N)) return; // Other cases are autogenerated. diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll --- a/llvm/test/CodeGen/PowerPC/and-mask.ll +++ b/llvm/test/CodeGen/PowerPC/and-mask.ll @@ -50,10 +50,8 @@ define i64 @test5(i64 %a) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: oris 4, 4, 65472 -; CHECK-NEXT: ori 4, 4, 65535 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rlwinm 3, 3, 0, 16, 9 +; CHECK-NEXT: rotlwi 3, 3, 0 ; CHECK-NEXT: blr %and = and i64 %a, 4290838527 ret i64 %and @@ -63,9 +61,8 @@ define i64 @test6(i64 %a) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: lis 4, 16320 -; CHECK-NEXT: ori 4, 4, 65504 -; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rlwinm 3, 3, 0, 16, 9 +; CHECK-NEXT: rlwinm 3, 3, 0, 2, 26 ; CHECK-NEXT: blr %and = and i64 %a, 1069613024 ret i64 %and diff --git a/llvm/test/CodeGen/PowerPC/cmpb.ll b/llvm/test/CodeGen/PowerPC/cmpb.ll --- a/llvm/test/CodeGen/PowerPC/cmpb.ll +++ b/llvm/test/CodeGen/PowerPC/cmpb.ll @@ -123,11 +123,9 @@ ret i32 %or55 ; CHECK-LABEL: @test32p1 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rlwinm [[REG2:[0-9]+]], [[REG1]], 0, 13, 7 +; CHECK: rotlwi 3, [[REG2]], 0 ; CHECK: blr } @@ -147,11 +145,9 @@ ret i32 %or37 ; CHECK-LABEL: @test32p2 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: cmpb [[REG4:[0-9]+]], 4, 3 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: and 3, [[REG4]], [[REG3]] +; CHECK: cmpb [[REG1:[0-9]+]], 4, 3 +; CHECK: rlwinm [[REG2:[0-9]+]], [[REG1]], 0, 16, 7 +; CHECK: rotlwi 3, [[REG2]], 0 ; CHECK: blr }