Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -602,7 +602,6 @@ SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -527,10 +527,6 @@ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::BSWAP); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::ROTL); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -5524,76 +5520,6 @@ return SDValue(); } -SDValue SystemZTargetLowering::combineSHIFTROT( - SDNode *N, DAGCombinerInfo &DCI) const { - - SelectionDAG &DAG = DCI.DAG; - - // Shift/rotate instructions only use the last 6 bits of the second operand - // register. If the second operand is the result of an AND with an immediate - // value that has its last 6 bits set, we can safely remove the AND operation. - // - // If the AND operation doesn't have the last 6 bits set, we can't remove it - // entirely, but we can still truncate it to a 16-bit value. This prevents - // us from ending up with a NILL with a signed operand, which will cause the - // instruction printer to abort. - SDValue N1 = N->getOperand(1); - if (N1.getOpcode() == ISD::AND) { - SDValue AndMaskOp = N1->getOperand(1); - auto *AndMask = dyn_cast(AndMaskOp); - - // The AND mask is constant - if (AndMask) { - auto AmtVal = AndMask->getZExtValue(); - - // Bottom 6 bits are set - if ((AmtVal & 0x3f) == 0x3f) { - SDValue AndOp = N1->getOperand(0); - - // This is the only use, so remove the node - if (N1.hasOneUse()) { - // Combine the AND away - DCI.CombineTo(N1.getNode(), AndOp); - - // Return N so it isn't rechecked - return SDValue(N, 0); - - // The node will be reused, so create a new node for this one use - } else { - SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0), N->getOperand(0), - AndOp); - DCI.AddToWorklist(Replace.getNode()); - - return Replace; - } - - // We can't remove the AND, but we can use NILL here (normally we would - // use NILF). Only keep the last 16 bits of the mask. The actual - // transformation will be handled by .td definitions. - } else if (AmtVal >> 16 != 0) { - SDValue AndOp = N1->getOperand(0); - - auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff, - SDLoc(AndMaskOp), - AndMaskOp.getValueType()); - - auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(), - AndOp, NewMask); - - SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0), N->getOperand(0), - NewAnd); - DCI.AddToWorklist(Replace.getNode()); - - return Replace; - } - } - } - - return SDValue(); -} - static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, @@ -5752,10 +5678,6 @@ case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: return combineSHIFTROT(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -1352,8 +1352,8 @@ //===----------------------------------------------------------------------===// // Logical shift left. -defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; -def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; +defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop, GR32>; +def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop, GR64>; def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; // Arithmetic shift left. @@ -1364,20 +1364,20 @@ } // Logical shift right. -defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; -def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; +defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop, GR32>; +def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop, GR64>; def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; - def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop, GR64>; def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>; } // Rotate left. -def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; -def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; +def RLL : BinaryRSY<"rll", 0xEB1D, shiftop, GR32>; +def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop, GR64>; // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and @@ -2162,29 +2162,29 @@ // Complexity is added so that we match this before we match NILF on the AND // operation alone. let AddedComplexity = 4 in { - def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; } // Peepholes for turning scalar operations into block operations. Index: lib/Target/SystemZ/SystemZOperands.td =================================================================== --- lib/Target/SystemZ/SystemZOperands.td +++ lib/Target/SystemZ/SystemZOperands.td @@ -357,6 +357,7 @@ }], UIMM16, "U16Imm">; def imm32sx16trunc : Immediate; +def imm32zx16trunc : Immediate; // Full 32-bit immediates. we need both signed and unsigned versions // because the assembler is picky. E.g. AFI requires signed operands Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -697,6 +697,16 @@ : PatFrag<(ops node:$addr), (store (operator), node:$addr)>; +// Create a shift operator that optionally ignores an AND of the +// shift count with an immediate if the bottom 6 bits are all set. +def imm32bottom6set : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x3f) == 0x3f; +}]>; +class shiftop + : PatFrags<(ops node:$val, node:$count), + [(operator node:$val, node:$count), + (operator node:$val, (and node:$count, imm32bottom6set))]>; + // Vector representation of all-zeros and all-ones. def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; Index: test/CodeGen/SystemZ/shift-12.ll =================================================================== --- test/CodeGen/SystemZ/shift-12.ll +++ test/CodeGen/SystemZ/shift-12.ll @@ -104,3 +104,15 @@ %reuse = add i32 %and, %shift ret i32 %reuse } + +; Test that AND is not removed for i128 (which calls __ashlti3) +define i128 @f11(i128 %a, i32 %sh) { +; CHECK-LABEL: f11: +; CHECK: risbg %r4, %r4, 57, 191, 0 +; CHECK: brasl %r14, __ashlti3@PLT + %and = and i32 %sh, 127 + %ext = zext i32 %and to i128 + %shift = shl i128 %a, %ext + ret i128 %shift +} + Index: utils/TableGen/CodeGenDAGPatterns.cpp =================================================================== --- utils/TableGen/CodeGenDAGPatterns.cpp +++ utils/TableGen/CodeGenDAGPatterns.cpp @@ -3946,6 +3946,24 @@ return false; } +// Promote xform function to be an explicit node wherever set. +static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) { + if (Record *Xform = N->getTransformFn()) { + N->setTransformFn(nullptr); + std::vector Children; + Children.push_back(PromoteXForms(N)); + return std::make_shared(Xform, std::move(Children), + N->getNumTypes()); + } + + if (!N->isLeaf()) + for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) { + TreePatternNodePtr Child = N->getChildShared(i); + N->setChild(i, std::move(PromoteXForms(Child))); + } + return N; +} + void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef, TreePattern &Pattern, TreePattern &Result, const std::vector &InstImpResults) { @@ -4011,30 +4029,8 @@ Result.error("Could not infer all types in pattern result!"); } - // Promote the xform function to be an explicit node if set. - const TreePatternNodePtr &DstPattern = Result.getOnlyTree(); - std::vector ResultNodeOperands; - for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) { - TreePatternNodePtr OpNode = DstPattern->getChildShared(ii); - if (Record *Xform = OpNode->getTransformFn()) { - OpNode->setTransformFn(nullptr); - std::vector Children; - Children.push_back(OpNode); - OpNode = std::make_shared(Xform, std::move(Children), - OpNode->getNumTypes()); - } - ResultNodeOperands.push_back(OpNode); - } - - TreePatternNodePtr DstShared = - DstPattern->isLeaf() - ? DstPattern - : std::make_shared(DstPattern->getOperator(), - std::move(ResultNodeOperands), - DstPattern->getNumTypes()); - - for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i) - DstShared->setType(i, Result.getOnlyTree()->getExtType(i)); + // Promote xform function to be an explicit node wherever set. + TreePatternNodePtr DstShared = PromoteXForms(Result.getOnlyTree()->clone()); TreePattern Temp(Result.getRecord(), DstShared, false, *this); Temp.InferAllTypes();