diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -424,6 +424,7 @@ bool tryFoldSWTestBRCC(SDNode *N); bool trySelectLoopCountIntrinsic(SDNode *N); bool tryAsSingleRLDICL(SDNode *N); + bool tryAsSingleRLDCL(SDNode *N); bool tryAsSingleRLDICR(SDNode *N); bool tryAsSingleRLWINM(SDNode *N); bool tryAsSingleRLWINM8(SDNode *N); @@ -5084,6 +5085,35 @@ return false; } +bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) + return false; + + SDValue Val = N->getOperand(0); + + if (Val.getOpcode() != ISD::ROTL) + return false; + + // Looking to try to avoid a situation like this one: + // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23) + // %and1 = and i64 %2, 9223372036854775807 + // In this function we are looking to try to match RLDCL. However, the above + // DAG would better match RLDICL instead which is not what we are looking + // for here. + SDValue RotateAmt = Val.getOperand(1); + if (RotateAmt.getOpcode() == ISD::Constant) + return false; + + unsigned MB = 64 - llvm::countr_one(Imm64); + SDLoc dl(N); + SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops); + return true; +} + bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); uint64_t Imm64; @@ -5604,8 +5634,9 @@ case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr - if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || - tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) + if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) || + tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || + tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. diff --git a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll --- a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll @@ -75,22 +75,19 @@ ; AIX64-LABEL: rotatemask64: ; AIX64: # %bb.0: # %entry ; AIX64-NEXT: cntlzd r4, r3 -; AIX64-NEXT: rotld r3, r3, r4 -; AIX64-NEXT: clrldi r3, r3, 1 +; AIX64-NEXT: rldcl r3, r3, r4, 1 ; AIX64-NEXT: blr ; ; LINUX64BE-LABEL: rotatemask64: ; LINUX64BE: # %bb.0: # %entry ; LINUX64BE-NEXT: cntlzd r4, r3 -; LINUX64BE-NEXT: rotld r3, r3, r4 -; LINUX64BE-NEXT: clrldi r3, r3, 1 +; LINUX64BE-NEXT: rldcl r3, r3, r4, 1 ; LINUX64BE-NEXT: blr ; ; LINUX64LE-LABEL: rotatemask64: ; LINUX64LE: # %bb.0: # %entry ; LINUX64LE-NEXT: cntlzd r4, r3 -; LINUX64LE-NEXT: rotld r3, r3, r4 -; LINUX64LE-NEXT: clrldi r3, r3, 1 +; LINUX64LE-NEXT: rldcl r3, r3, r4, 1 ; LINUX64LE-NEXT: blr entry: %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false) @@ -126,22 +123,19 @@ ; AIX64-LABEL: rotatemask64_2: ; AIX64: # %bb.0: # %entry ; AIX64-NEXT: cntlzd r4, r3 -; AIX64-NEXT: rotld r3, r3, r4 -; AIX64-NEXT: clrldi r3, r3, 1 +; AIX64-NEXT: rldcl r3, r3, r4, 1 ; AIX64-NEXT: blr ; ; LINUX64BE-LABEL: rotatemask64_2: ; LINUX64BE: # %bb.0: # %entry ; LINUX64BE-NEXT: cntlzd r4, r3 -; LINUX64BE-NEXT: rotld r3, r3, r4 -; LINUX64BE-NEXT: clrldi r3, r3, 1 +; LINUX64BE-NEXT: rldcl r3, r3, r4, 1 ; LINUX64BE-NEXT: blr ; ; LINUX64LE-LABEL: rotatemask64_2: ; LINUX64LE: # %bb.0: # %entry ; LINUX64LE-NEXT: cntlzd r4, r3 -; LINUX64LE-NEXT: rotld r3, r3, r4 -; LINUX64LE-NEXT: clrldi r3, r3, 1 +; LINUX64LE-NEXT: rldcl r3, r3, r4, 1 ; LINUX64LE-NEXT: blr entry: %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false) @@ -222,20 +216,17 @@ ; ; AIX64-LABEL: rotatemask64_nocount: ; AIX64: # %bb.0: # %entry -; AIX64-NEXT: rotld r3, r3, r4 -; AIX64-NEXT: clrldi r3, r3, 8 +; AIX64-NEXT: rldcl r3, r3, r4, 8 ; AIX64-NEXT: blr ; ; LINUX64BE-LABEL: rotatemask64_nocount: ; LINUX64BE: # %bb.0: # %entry -; LINUX64BE-NEXT: rotld r3, r3, r4 -; LINUX64BE-NEXT: clrldi r3, r3, 8 +; LINUX64BE-NEXT: rldcl r3, r3, r4, 8 ; LINUX64BE-NEXT: blr ; ; LINUX64LE-LABEL: rotatemask64_nocount: ; LINUX64LE: # %bb.0: # %entry -; LINUX64LE-NEXT: rotld r3, r3, r4 -; LINUX64LE-NEXT: clrldi r3, r3, 8 +; LINUX64LE-NEXT: rldcl r3, r3, r4, 8 ; LINUX64LE-NEXT: blr entry: %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %clz) @@ -262,20 +253,17 @@ ; ; AIX64-LABEL: builtincheck: ; AIX64: # %bb.0: # %entry -; AIX64-NEXT: rotld r3, r3, r4 -; AIX64-NEXT: clrldi r3, r3, 1 +; AIX64-NEXT: rldcl r3, r3, r4, 1 ; AIX64-NEXT: blr ; ; LINUX64BE-LABEL: builtincheck: ; LINUX64BE: # %bb.0: # %entry -; LINUX64BE-NEXT: rotld r3, r3, r4 -; LINUX64BE-NEXT: clrldi r3, r3, 1 +; LINUX64BE-NEXT: rldcl r3, r3, r4, 1 ; LINUX64BE-NEXT: blr ; ; LINUX64LE-LABEL: builtincheck: ; LINUX64LE: # %bb.0: # %entry -; LINUX64LE-NEXT: rotld r3, r3, r4 -; LINUX64LE-NEXT: clrldi r3, r3, 1 +; LINUX64LE-NEXT: rldcl r3, r3, r4, 1 ; LINUX64LE-NEXT: blr entry: %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %shift) @@ -352,9 +340,9 @@ ; AIX64-NEXT: stdu r1, -112(r1) ; AIX64-NEXT: cntlzd r4, r3 ; AIX64-NEXT: std r0, 128(r1) -; AIX64-NEXT: rotld r4, r3, r4 -; AIX64-NEXT: clrldi r3, r4, 1 -; AIX64-NEXT: clrldi r4, r4, 16 +; AIX64-NEXT: rldcl r5, r3, r4, 1 +; AIX64-NEXT: rldcl r4, r3, r4, 16 +; AIX64-NEXT: mr r3, r5 ; AIX64-NEXT: bl .callee[PR] ; AIX64-NEXT: nop ; AIX64-NEXT: addi r1, r1, 112 @@ -368,9 +356,9 @@ ; LINUX64BE-NEXT: stdu r1, -112(r1) ; LINUX64BE-NEXT: cntlzd r4, r3 ; LINUX64BE-NEXT: std r0, 128(r1) -; LINUX64BE-NEXT: rotld r4, r3, r4 -; LINUX64BE-NEXT: clrldi r3, r4, 1 -; LINUX64BE-NEXT: clrldi r4, r4, 16 +; LINUX64BE-NEXT: rldcl r5, r3, r4, 1 +; LINUX64BE-NEXT: rldcl r4, r3, r4, 16 +; LINUX64BE-NEXT: mr r3, r5 ; LINUX64BE-NEXT: bl callee ; LINUX64BE-NEXT: nop ; LINUX64BE-NEXT: addi r1, r1, 112 @@ -384,9 +372,9 @@ ; LINUX64LE-NEXT: stdu r1, -32(r1) ; LINUX64LE-NEXT: cntlzd r4, r3 ; LINUX64LE-NEXT: std r0, 48(r1) -; LINUX64LE-NEXT: rotld r4, r3, r4 -; LINUX64LE-NEXT: clrldi r3, r4, 1 -; LINUX64LE-NEXT: clrldi r4, r4, 16 +; LINUX64LE-NEXT: rldcl r5, r3, r4, 1 +; LINUX64LE-NEXT: rldcl r4, r3, r4, 16 +; LINUX64LE-NEXT: mr r3, r5 ; LINUX64LE-NEXT: bl callee ; LINUX64LE-NEXT: nop ; LINUX64LE-NEXT: addi r1, r1, 32 @@ -445,8 +433,7 @@ ; AIX64-NEXT: stdu r1, -112(r1) ; AIX64-NEXT: cntlzd r4, r3 ; AIX64-NEXT: std r0, 128(r1) -; AIX64-NEXT: rotld r4, r3, r4 -; AIX64-NEXT: clrldi r5, r4, 1 +; AIX64-NEXT: rldcl r5, r3, r4, 1 ; AIX64-NEXT: rldicl r4, r3, 23, 1 ; AIX64-NEXT: mr r3, r5 ; AIX64-NEXT: bl .callee[PR] @@ -462,8 +449,7 @@ ; LINUX64BE-NEXT: stdu r1, -112(r1) ; LINUX64BE-NEXT: cntlzd r4, r3 ; LINUX64BE-NEXT: std r0, 128(r1) -; LINUX64BE-NEXT: rotld r4, r3, r4 -; LINUX64BE-NEXT: clrldi r5, r4, 1 +; LINUX64BE-NEXT: rldcl r5, r3, r4, 1 ; LINUX64BE-NEXT: rldicl r4, r3, 23, 1 ; LINUX64BE-NEXT: mr r3, r5 ; LINUX64BE-NEXT: bl callee @@ -479,8 +465,7 @@ ; LINUX64LE-NEXT: stdu r1, -32(r1) ; LINUX64LE-NEXT: cntlzd r4, r3 ; LINUX64LE-NEXT: std r0, 48(r1) -; LINUX64LE-NEXT: rotld r4, r3, r4 -; LINUX64LE-NEXT: clrldi r5, r4, 1 +; LINUX64LE-NEXT: rldcl r5, r3, r4, 1 ; LINUX64LE-NEXT: rldicl r4, r3, 23, 1 ; LINUX64LE-NEXT: mr r3, r5 ; LINUX64LE-NEXT: bl callee