Index: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h @@ -552,6 +552,7 @@ SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. Index: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -440,6 +440,10 @@ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::ROTL); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -2874,7 +2878,7 @@ // Add extra space for alignment if needed. if (ExtraAlignSpace) NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, - DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); @@ -5069,6 +5073,50 @@ return SDValue(); } +SDValue SystemZTargetLowering::combineSHIFTROT( + SDNode *N, DAGCombinerInfo &DCI) const { + + SelectionDAG &DAG = DCI.DAG; + + // Shift/rotate instructions only use the last 6 bits of the second operand + // register. If the second operand is the result of an AND with an immediate + // value that has its last 6 bits set, we can safely remove the AND operation. + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::AND) { + auto *AndMask = dyn_cast(N1.getOperand(1)); + + // The AND mask is constant + if (AndMask) { + auto AmtVal = AndMask->getZExtValue(); + + // Bottom 6 bits are set + if ((AmtVal & 0x3f) == 0x3f) { + SDValue AndOp = N1->getOperand(0); + + // This is the only use, so remove the node + if (N1.hasOneUse()) { + // Combine the AND away + DCI.CombineTo(N1.getNode(), AndOp); + + // Return N so it isn't rechecked + return SDValue(N, 0); + + // The node will be reused, so create a new node for this one use + } else { + SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0), N->getOperand(0), + AndOp); + DCI.AddToWorklist(Replace.getNode()); + + return Replace; + } + } + } + } + + return SDValue(); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { @@ -5081,7 +5129,12 @@ case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: return combineSHIFTROT(N, DCI); } + return SDValue(); } Index: llvm/trunk/test/CodeGen/SystemZ/rot-01.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/rot-01.ll +++ llvm/trunk/test/CodeGen/SystemZ/rot-01.ll @@ -21,9 +21,9 @@ ; Test 64-bit rotate. define i64 @f2(i64 %val, i64 %amt) { ; CHECK-LABEL: f2: -; CHECK: nill %r3, 63 +; CHECK: nill %r3, 31 ; CHECK: rllg %r2, %r2, 0(%r3) - %mod = urem i64 %amt, 64 + %mod = urem i64 %amt, 32 %inv = sub i64 64, %mod %parta = shl i64 %val, %mod Index: llvm/trunk/test/CodeGen/SystemZ/rot-02.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/rot-02.ll +++ llvm/trunk/test/CodeGen/SystemZ/rot-02.ll @@ -0,0 +1,86 @@ +; Test removal of AND operations that don't affect last 6 bits of rotate amount +; operand. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test that AND is not removed when some lower 6 bits are not set. +define i32 @f1(i32 %val, i32 %amt) { +; CHECK-LABEL: f1: +; CHECK: nil{{[lf]}} %r3, 31 +; CHECK: rll %r2, %r2, 0(%r3) + %and = and i32 %amt, 31 + + %inv = sub i32 32, %and + %parta = shl i32 %val, %and + %partb = lshr i32 %val, %inv + + %rotl = or i32 %parta, %partb + + ret i32 %rotl +} + +; Test removal of AND mask with only bottom 6 bits set. +define i32 @f2(i32 %val, i32 %amt) { +; CHECK-LABEL: f2: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: rll %r2, %r2, 0(%r3) + %and = and i32 %amt, 63 + + %inv = sub i32 32, %and + %parta = shl i32 %val, %and + %partb = lshr i32 %val, %inv + + %rotl = or i32 %parta, %partb + + ret i32 %rotl +} + +; Test removal of AND mask including but not limited to bottom 6 bits. +define i32 @f3(i32 %val, i32 %amt) { +; CHECK-LABEL: f3: +; CHECK-NOT: nil{{[lf]}} %r3, 255 +; CHECK: rll %r2, %r2, 0(%r3) + %and = and i32 %amt, 255 + + %inv = sub i32 32, %and + %parta = shl i32 %val, %and + %partb = lshr i32 %val, %inv + + %rotl = or i32 %parta, %partb + + ret i32 %rotl +} + +; Test removal of AND mask from RLLG. +define i64 @f4(i64 %val, i64 %amt) { +; CHECK-LABEL: f4: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: rllg %r2, %r2, 0(%r3) + %and = and i64 %amt, 63 + + %inv = sub i64 64, %and + %parta = shl i64 %val, %and + %partb = lshr i64 %val, %inv + + %rotl = or i64 %parta, %partb + + ret i64 %rotl +} + +; Test that AND is not entirely removed if the result is reused. +define i32 @f5(i32 %val, i32 %amt) { +; CHECK-LABEL: f5: +; CHECK: rll %r2, %r2, 0(%r3) +; CHECK: nil{{[lf]}} %r3, 63 +; CHECK: ar %r2, %r3 + %and = and i32 %amt, 63 + + %inv = sub i32 32, %and + %parta = shl i32 %val, %and + %partb = lshr i32 %val, %inv + + %rotl = or i32 %parta, %partb + + %reuse = add i32 %and, %rotl + ret i32 %reuse +} Index: llvm/trunk/test/CodeGen/SystemZ/shift-12.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/shift-12.ll +++ llvm/trunk/test/CodeGen/SystemZ/shift-12.ll @@ -0,0 +1,106 @@ +; Test removal of AND operations that don't affect last 6 bits of shift amount +; operand. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test that AND is not removed when some lower 6 bits are not set. +define i32 @f1(i32 %a, i32 %sh) { +; CHECK-LABEL: f1: +; CHECK: nil{{[lf]}} %r3, 31 +; CHECK: sll %r2, 0(%r3) + %and = and i32 %sh, 31 + %shift = shl i32 %a, %and + ret i32 %shift +} + +; Test removal of AND mask with only bottom 6 bits set. +define i32 @f2(i32 %a, i32 %sh) { +; CHECK-LABEL: f2: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: sll %r2, 0(%r3) + %and = and i32 %sh, 63 + %shift = shl i32 %a, %and + ret i32 %shift +} + +; Test removal of AND mask including but not limited to bottom 6 bits. +define i32 @f3(i32 %a, i32 %sh) { +; CHECK-LABEL: f3: +; CHECK-NOT: nil{{[lf]}} %r3, 255 +; CHECK: sll %r2, 0(%r3) + %and = and i32 %sh, 255 + %shift = shl i32 %a, %and + ret i32 %shift +} + +; Test removal of AND mask from SRA. +define i32 @f4(i32 %a, i32 %sh) { +; CHECK-LABEL: f4: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: sra %r2, 0(%r3) + %and = and i32 %sh, 63 + %shift = ashr i32 %a, %and + ret i32 %shift +} + +; Test removal of AND mask from SRL. +define i32 @f5(i32 %a, i32 %sh) { +; CHECK-LABEL: f5: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: srl %r2, 0(%r3) + %and = and i32 %sh, 63 + %shift = lshr i32 %a, %and + ret i32 %shift +} + +; Test removal of AND mask from SLLG. +define i64 @f6(i64 %a, i64 %sh) { +; CHECK-LABEL: f6: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: sllg %r2, %r2, 0(%r3) + %and = and i64 %sh, 63 + %shift = shl i64 %a, %and + ret i64 %shift +} + +; Test removal of AND mask from SRAG. +define i64 @f7(i64 %a, i64 %sh) { +; CHECK-LABEL: f7: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: srag %r2, %r2, 0(%r3) + %and = and i64 %sh, 63 + %shift = ashr i64 %a, %and + ret i64 %shift +} + +; Test removal of AND mask from SRLG. +define i64 @f8(i64 %a, i64 %sh) { +; CHECK-LABEL: f8: +; CHECK-NOT: nil{{[lf]}} %r3, 63 +; CHECK: srlg %r2, %r2, 0(%r3) + %and = and i64 %sh, 63 + %shift = lshr i64 %a, %and + ret i64 %shift +} + +; Test that AND with two register operands is not affected. +define i32 @f9(i32 %a, i32 %b, i32 %sh) { +; CHECK-LABEL: f9: +; CHECK: nr %r3, %r4 +; CHECK: sll %r2, 0(%r3) + %and = and i32 %sh, %b + %shift = shl i32 %a, %and + ret i32 %shift +} + +; Test that AND is not entirely removed if the result is reused. +define i32 @f10(i32 %a, i32 %sh) { +; CHECK-LABEL: f10: +; CHECK: sll %r2, 0(%r3) +; CHECK: nil{{[lf]}} %r3, 63 +; CHECK: ar %r2, %r3 + %and = and i32 %sh, 63 + %shift = shl i32 %a, %and + %reuse = add i32 %and, %shift + ret i32 %reuse +}