Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -330,6 +330,9 @@ // to X. bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + // Try to expand a boolean SELECT_CCMASK using an IPM sequence. + SDValue expandSelectBoolean(SDNode *Node); + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel) {} @@ -348,6 +351,7 @@ void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; + void PreprocessISelDAG() override; // Include the pieces autogenerated from the target description. #include "SystemZGenDAGISel.inc" @@ -1438,3 +1442,183 @@ return true; } + +namespace { +// Represents a sequence for extracting a 0/1 value from an IPM result: +// (((X ^ XORValue) + AddValue) >> Bit) +struct IPMConversion { + IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) + : XORValue(xorValue), AddValue(addValue), Bit(bit) {} + + int64_t XORValue; + int64_t AddValue; + unsigned Bit; +}; +} // end anonymous namespace + +// Return a sequence for getting a 1 from an IPM result when CC has a +// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. +// The handling of CC values outside CCValid doesn't matter. +static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { + // Deal with cases where the result can be taken directly from a bit + // of the IPM result. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC); + if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC + 1); + + // Deal with cases where we can add a value to force the sign bit + // to contain the right value. Putting the bit in 31 means we can + // use SRL rather than RISBG(L), and also makes it easier to get a + // 0/-1 value, so it has priority over the other tests below. + // + // These sequences rely on the fact that the upper two bits of the + // IPM result are zero. + uint64_t TopBit = uint64_t(1) << 31; + if (CCMask == (CCValid & SystemZ::CCMASK_0)) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) + return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_2))) + return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_3)) + return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_1 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); + + // Next try inverting the value and testing a bit. 0/1 could be + // handled this way too, but we dealt with that case above. + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) + return IPMConversion(-1, 0, SystemZ::IPM_CC); + + // Handle cases where adding a value forces a non-sign bit to contain + // the right value. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) + return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); + + // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are + // can be done by inverting the low CC bit and applying one of the + // sign-based extractions above. + if (CCMask == (CCValid & SystemZ::CCMASK_1)) + return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_2)) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (1 << SystemZ::IPM_CC), 31); + + llvm_unreachable("Unexpected CC combination"); +} + +SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { + auto *TrueOp = dyn_cast(Node->getOperand(0)); + auto *FalseOp = dyn_cast(Node->getOperand(1)); + if (!TrueOp || !FalseOp) + return SDValue(); + if (FalseOp->getZExtValue() != 0) + return SDValue(); + if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1) + return SDValue(); + + auto *CCValidOp = dyn_cast(Node->getOperand(2)); + auto *CCMaskOp = dyn_cast(Node->getOperand(3)); + if (!CCValidOp || !CCMaskOp) + return SDValue(); + int CCValid = CCValidOp->getZExtValue(); + int CCMask = CCMaskOp->getZExtValue(); + + SDLoc DL(Node); + SDValue Glue = Node->getOperand(4); + IPMConversion Conversion = getIPMConversion(CCValid, CCMask); + SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + + if (Conversion.XORValue) { + SDValue XORValue = CurDAG->getConstant(Conversion.XORValue, DL, MVT::i32); + Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result, XORValue); + } + + if (Conversion.AddValue) { + SDValue AddValue = CurDAG->getConstant(Conversion.AddValue, DL, MVT::i32); + Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, AddValue); + } + + if (Node->getValueType(0) == MVT::i32 && Conversion.Bit == 31) { + unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA; + SDValue Bit = CurDAG->getConstant(Conversion.Bit, DL, MVT::i32); + Result = CurDAG->getNode(ShiftOp, DL, MVT::i32, Result, Bit); + } else { + EVT VT = Node->getValueType(0); + if (VT != MVT::i32) + Result = CurDAG->getNode(ISD::ANY_EXTEND, DL, VT, Result); + + if (TrueOp->getSExtValue() == 1) { + // The SHR/AND sequence should get optimized to an RISBG. + SDValue Bit = CurDAG->getConstant(Conversion.Bit, DL, MVT::i32); + Result = CurDAG->getNode(ISD::SRL, DL, VT, Result, Bit); + SDValue One = CurDAG->getConstant(1, DL, VT); + Result = CurDAG->getNode(ISD::AND, DL, VT, Result, One); + } else { + int Size = VT.getSizeInBits(); + SDValue SizeMBit + = CurDAG->getConstant(Size - 1 - Conversion.Bit, DL, MVT::i32); + Result = CurDAG->getNode(ISD::SHL, DL, VT, Result, SizeMBit); + SDValue SizeMOne = CurDAG->getConstant(Size - 1, DL, MVT::i32); + Result = CurDAG->getNode(ISD::SRA, DL, VT, Result, SizeMOne); + } + } + + return Result; +} + +void SystemZDAGToDAGISel::PreprocessISelDAG() { + // If we have conditional immediate loads, we always prefer + // using those over an IPM sequence. + if (Subtarget->hasLoadStoreOnCond2()) + return; + + bool MadeChange = false; + + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *N = &*I++; + if (N->use_empty()) + continue; + + SDValue Res; + switch (N->getOpcode()) { + default: break; + case SystemZISD::SELECT_CCMASK: + Res = expandSelectBoolean(N); + break; + } + + if (Res) { + DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(Res.getNode()->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + MadeChange = true; + } + } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); +} + Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -490,6 +490,14 @@ SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + /// Determine which of the bits specified in Mask are known to be either + /// zero or one and return them in the KnownZero/KnownOne bitsets. + void computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + ISD::NodeType getExtendForAtomicOps() const override { return ISD::ANY_EXTEND; } @@ -563,7 +571,9 @@ bool Force) const; SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const; + SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; @@ -571,6 +581,8 @@ SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -31,17 +31,6 @@ #define DEBUG_TYPE "systemz-lower" namespace { -// Represents a sequence for extracting a 0/1 value from an IPM result: -// (((X ^ XORValue) + AddValue) >> Bit) -struct IPMConversion { - IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) - : XORValue(xorValue), AddValue(addValue), Bit(bit) {} - - int64_t XORValue; - int64_t AddValue; - unsigned Bit; -}; - // Represents information about a comparison. struct Comparison { Comparison(SDValue Op0In, SDValue Op1In) @@ -517,7 +506,9 @@ setOperationAction(ISD::VAEND, MVT::Other, Expand); // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); @@ -1699,73 +1690,6 @@ #undef CONV } -// Return a sequence for getting a 1 from an IPM result when CC has a -// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. -// The handling of CC values outside CCValid doesn't matter. -static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { - // Deal with cases where the result can be taken directly from a bit - // of the IPM result. - if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) - return IPMConversion(0, 0, SystemZ::IPM_CC); - if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) - return IPMConversion(0, 0, SystemZ::IPM_CC + 1); - - // Deal with cases where we can add a value to force the sign bit - // to contain the right value. Putting the bit in 31 means we can - // use SRL rather than RISBG(L), and also makes it easier to get a - // 0/-1 value, so it has priority over the other tests below. - // - // These sequences rely on the fact that the upper two bits of the - // IPM result are zero. - uint64_t TopBit = uint64_t(1) << 31; - if (CCMask == (CCValid & SystemZ::CCMASK_0)) - return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) - return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_1 - | SystemZ::CCMASK_2))) - return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & SystemZ::CCMASK_3)) - return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_1 - | SystemZ::CCMASK_2 - | SystemZ::CCMASK_3))) - return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); - - // Next try inverting the value and testing a bit. 0/1 could be - // handled this way too, but we dealt with that case above. - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) - return IPMConversion(-1, 0, SystemZ::IPM_CC); - - // Handle cases where adding a value forces a non-sign bit to contain - // the right value. - if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) - return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) - return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); - - // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are - // can be done by inverting the low CC bit and applying one of the - // sign-based extractions above. - if (CCMask == (CCValid & SystemZ::CCMASK_1)) - return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & SystemZ::CCMASK_2)) - return IPMConversion(1 << SystemZ::IPM_CC, - TopBit - (3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_1 - | SystemZ::CCMASK_3))) - return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_2 - | SystemZ::CCMASK_3))) - return IPMConversion(1 << SystemZ::IPM_CC, - TopBit - (1 << SystemZ::IPM_CC), 31); - - llvm_unreachable("Unexpected CC combination"); -} - // If C can be converted to a comparison against zero, adjust the operands // as necessary. static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { @@ -2237,6 +2161,24 @@ C.CCMask = NewCCMask; } +// See whether the comparison argument contains a redundant AND +// and remove it if so. This sometimes happens due to the generic +// BRCOND expansion. +static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.Op0.getOpcode() != ISD::AND) + return; + auto *Mask = dyn_cast(C.Op0.getOperand(1)); + if (!Mask) + return; + KnownBits Known; + DAG.computeKnownBits(C.Op0.getOperand(0), Known); + if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) + return; + + C.Op0 = C.Op0.getOperand(0); +} + // Return a Comparison that tests the condition-code result of intrinsic // node Call against constant integer CC using comparison code Cond. // Opcode is the opcode of the SystemZISD operation for the intrinsic @@ -2311,6 +2253,7 @@ else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustForRedundantAnd(DAG, DL, C); adjustZeroCmp(DAG, DL, C); adjustSubwordCmp(DAG, DL, C); adjustForSubtraction(DAG, DL, C); @@ -2388,24 +2331,11 @@ // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, unsigned CCValid, unsigned CCMask) { - IPMConversion Conversion = getIPMConversion(CCValid, CCMask); - SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - - if (Conversion.XORValue) - Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, - DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); - - if (Conversion.AddValue) - Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, - DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); - - // The SHR/AND sequence should get optimized to an RISBG. - Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, - DAG.getConstant(Conversion.Bit, DL, MVT::i32)); - if (Conversion.Bit != 31) - Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, - DAG.getConstant(1, DL, MVT::i32)); - return Result; + SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(CCValid, DL, MVT::i32), + DAG.getConstant(CCMask, DL, MVT::i32), Glue }; + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot @@ -2620,35 +2550,10 @@ } SDValue Glue = emitCmp(DAG, DL, C); - - // Special case for handling -1/0 results. The shifts we use here - // should get optimized with the IPM conversion sequence. - auto *TrueC = dyn_cast(TrueOp); - auto *FalseC = dyn_cast(FalseOp); - if (TrueC && FalseC) { - int64_t TrueVal = TrueC->getSExtValue(); - int64_t FalseVal = FalseC->getSExtValue(); - if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { - // Invert the condition if we want -1 on false. - if (TrueVal == 0) - C.CCMask ^= C.CCValid; - SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); - EVT VT = Op.getValueType(); - // Extend the result to VT. Upper bits are ignored. - if (!is32Bit(VT)) - Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); - // Sign-extend from the low bit. - SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); - return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); - } - } - SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, @@ -5174,6 +5079,54 @@ return SDValue(); } +SDValue SystemZTargetLowering::combineZERO_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { + auto *TrueOp = dyn_cast(N0.getOperand(0)); + auto *FalseOp = dyn_cast(N0.getOperand(1)); + if (TrueOp && FalseOp) { + SDLoc DL(N0); + SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), + DAG.getConstant(FalseOp->getZExtValue(), DL, VT), + N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; + SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); + // If N0 has multiple uses, change other uses as well. + if (!N0.hasOneUse()) { + SDValue TruncSelect = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); + DCI.CombineTo(N0.getNode(), TruncSelect); + } + return NewSelect; + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) + // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) + // into (select_cc LHS, RHS, -1, 0, COND) + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast(N->getOperand(1))->getVT(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) + N0 = N0.getOperand(0); + if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { + SDLoc DL(N0); + SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + return SDValue(); +} + SDValue SystemZTargetLowering::combineSIGN_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext (ashr (shl X, C1), C2)) to @@ -5475,11 +5428,116 @@ return SDValue(); } +static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) { + // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code + // set by the glued instruction using the CCValid / CCMask masks, + // If the glued instruction is itself a (ICMP (SELECT_CCMASK)) testing + // the condition code set by some other instruction, see whether we + // can directly use that condition code. + bool Invert = false; + + // Verify that we have an appropriate mask for a EQ or NE comparison. + if (CCValid != SystemZ::CCMASK_ICMP) + return false; + if (CCMask == SystemZ::CCMASK_CMP_NE) + Invert = !Invert; + else if (CCMask != SystemZ::CCMASK_CMP_EQ) + return false; + + // Verify that we have an ICMP that is the single user of a SELECT_CCMASK. + SDNode *ICmp = Glue.getNode(); + if (ICmp->getOpcode() != SystemZISD::ICMP) + return false; + SDNode *Select = ICmp->getOperand(0).getNode(); + if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) + return false; + if (!Select->hasOneUse()) + return false; + + // Verify that the ICMP compares against one of select values. + auto *CompareVal = dyn_cast(ICmp->getOperand(1)); + if (!CompareVal) + return false; + auto *TrueVal = dyn_cast(Select->getOperand(0)); + if (!TrueVal) + return false; + auto *FalseVal = dyn_cast(Select->getOperand(1)); + if (!FalseVal) + return false; + if (CompareVal->getZExtValue() == FalseVal->getZExtValue()) + Invert = !Invert; + else if (CompareVal->getZExtValue() != TrueVal->getZExtValue()) + return false; + + // Compute the effective CC mask for the new branch or select. + auto *NewCCValid = dyn_cast(Select->getOperand(2)); + auto *NewCCMask = dyn_cast(Select->getOperand(3)); + if (!NewCCValid || !NewCCMask) + return false; + CCValid = NewCCValid->getZExtValue(); + CCMask = NewCCMask->getZExtValue(); + if (Invert) + CCMask ^= CCValid; + + // Return the updated Glue link. + Glue = Select->getOperand(4); + return true; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. + auto *CCValid = dyn_cast(N->getOperand(1)); + auto *CCMask = dyn_cast(N->getOperand(2)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue Glue = N->getOperand(4); + + if (combineCCMask(Glue, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), + N->getOperand(0), + DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(3), Glue); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSELECT_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. + auto *CCValid = dyn_cast(N->getOperand(2)); + auto *CCMask = dyn_cast(N->getOperand(3)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue Glue = N->getOperand(4); + + if (combineCCMask(Glue, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), + N->getOperand(0), + N->getOperand(1), + DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + Glue); + return SDValue(); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { default: break; + case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); + case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); case SystemZISD::MERGE_HIGH: case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); @@ -5491,11 +5549,37 @@ case ISD::SRA: case ISD::SRL: case ISD::ROTL: return combineSHIFTROT(N, DCI); + case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); + case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); } return SDValue(); } +void +SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + unsigned BitWidth = Known.getBitWidth(); + + Known.resetAll(); + switch (Op.getOpcode()) { + case SystemZISD::SELECT_CCMASK: { + KnownBits TrueKnown(BitWidth), FalseKnown(BitWidth); + DAG.computeKnownBits(Op.getOperand(0), TrueKnown, Depth + 1); + DAG.computeKnownBits(Op.getOperand(1), FalseKnown, Depth + 1); + Known.Zero = TrueKnown.Zero & FalseKnown.Zero; + Known.One = TrueKnown.One & FalseKnown.One; + break; + } + + default: + break; + } +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -2131,18 +2131,6 @@ def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; -// Optimize sign-extended 1/0 selects to -1/0 selects. This is important -// for vector legalization. -def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)), - (i32 31)), - (i32 31)), - (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>; -def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, - imm32zx4:$cc)))), - (i32 63)), - (i32 63)), - (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>; - // Avoid generating 2 XOR instructions. (xor (and x, y), y) is // equivalent to (and (xor x, -1), y) def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), Index: test/CodeGen/SystemZ/branch-07.ll =================================================================== --- test/CodeGen/SystemZ/branch-07.ll +++ test/CodeGen/SystemZ/branch-07.ll @@ -129,8 +129,8 @@ ; CHECK-LABEL: f9: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: afi [[REG]], -268435456 -; CHECK: risbg [[REG2:%r[0-5]]], [[REG]], 63, 191, 33 -; CHECK: lcgr {{%r[0-5]}}, [[REG2]] +; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32 +; CHECK: srag {{%r[0-5]}}, [[REG2]], 63 ; CHECK: br %r14 %avec = bitcast i64 %a to <2 x i32> %bvec = bitcast i64 %b to <2 x i32> @@ -145,8 +145,8 @@ ; CHECK-LABEL: f10: ; CHECK: ipm [[REG:%r[0-5]]] ; CHECK: afi [[REG]], 1879048192 -; CHECK: risbg [[REG2:%r[0-5]]], [[REG]], 63, 191, 33 -; CHECK: lcgr {{%r[0-5]}}, [[REG2]] +; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32 +; CHECK: srag {{%r[0-5]}}, [[REG2]], 63 ; CHECK: br %r14 %avec = bitcast i64 %a to <2 x i32> %bvec = bitcast i64 %b to <2 x i32> Index: test/CodeGen/SystemZ/cmpxchg-01.ll =================================================================== --- test/CodeGen/SystemZ/cmpxchg-01.ll +++ test/CodeGen/SystemZ/cmpxchg-01.ll @@ -89,3 +89,83 @@ ret i32 %res } + +declare void @g() + +; Check using the comparison result for a branch. +; CHECK-LABEL: f4 +; CHECK-MAIN-LABEL: f4: +; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}} +; CHECK-MAIN-DAG: sll %r2, 3 +; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]]) +; CHECK-MAIN: [[LOOP:\.[^ ]*]]: +; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 8(%r2) +; CHECK-MAIN: risbg %r3, [[TMP]], 32, 55, 0 +; CHECK-MAIN: cr [[TMP]], %r3 +; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: risbg %r4, [[TMP]], 32, 55, 0 +; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -8({{%r[1-9]+}}) +; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]]) +; CHECK-MAIN: jl [[LOOP]] +; CHECK-MAIN: [[EXIT]]: +; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]] +; CHECK-MAIN: jg g +; CHECK-MAIN: [[LABEL]]: +; CHECK-MAIN: br %r14 +; +; CHECK-SHIFT-LABEL: f4: +; CHECK-SHIFT: sll %r2, 3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2 +; CHECK-SHIFT: rll +; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -8([[NEGSHIFT]]) +define void @f4(i8 *%src, i8 %cmp, i8 %swap) { + %pair = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst + %cond = extractvalue { i8, i1 } %pair, 1 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +; CHECK-MAIN-LABEL: f5: +; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}} +; CHECK-MAIN-DAG: sll %r2, 3 +; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]]) +; CHECK-MAIN: [[LOOP:\.[^ ]*]]: +; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 8(%r2) +; CHECK-MAIN: risbg %r3, [[TMP]], 32, 55, 0 +; CHECK-MAIN: cr [[TMP]], %r3 +; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: risbg %r4, [[TMP]], 32, 55, 0 +; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -8({{%r[1-9]+}}) +; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]]) +; CHECK-MAIN: jl [[LOOP]] +; CHECK-MAIN: [[EXIT]]: +; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]] +; CHECK-MAIN: br %r14 +; CHECK-MAIN: [[LABEL]]: +; CHECK-MAIN: jg g +; +; CHECK-SHIFT-LABEL: f5: +; CHECK-SHIFT: sll %r2, 3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2 +; CHECK-SHIFT: rll +; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -8([[NEGSHIFT]]) +define void @f5(i8 *%src, i8 %cmp, i8 %swap) { + %pair = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst + %cond = extractvalue { i8, i1 } %pair, 1 + br i1 %cond, label %exit, label %call + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + Index: test/CodeGen/SystemZ/cmpxchg-02.ll =================================================================== --- test/CodeGen/SystemZ/cmpxchg-02.ll +++ test/CodeGen/SystemZ/cmpxchg-02.ll @@ -89,3 +89,82 @@ ret i32 %res } +declare void @g() + +; Check using the comparison result for a branch. +; CHECK-LABEL: f4 +; CHECK-MAIN-LABEL: f4: +; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}} +; CHECK-MAIN-DAG: sll %r2, 3 +; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]]) +; CHECK-MAIN: [[LOOP:\.[^ ]*]]: +; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 16(%r2) +; CHECK-MAIN: risbg %r3, [[TMP]], 32, 47, 0 +; CHECK-MAIN: cr [[TMP]], %r3 +; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: risbg %r4, [[TMP]], 32, 47, 0 +; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -16({{%r[1-9]+}}) +; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]]) +; CHECK-MAIN: jl [[LOOP]] +; CHECK-MAIN: [[EXIT]]: +; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]] +; CHECK-MAIN: jg g +; CHECK-MAIN: [[LABEL]]: +; CHECK-MAIN: br %r14 +; +; CHECK-SHIFT-LABEL: f4: +; CHECK-SHIFT: sll %r2, 3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2 +; CHECK-SHIFT: rll +; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -16([[NEGSHIFT]]) +define void @f4(i16 *%src, i16 %cmp, i16 %swap) { + %pair = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst + %cond = extractvalue { i16, i1 } %pair, 1 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +; CHECK-MAIN-LABEL: f5: +; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}} +; CHECK-MAIN-DAG: sll %r2, 3 +; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]]) +; CHECK-MAIN: [[LOOP:\.[^ ]*]]: +; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 16(%r2) +; CHECK-MAIN: risbg %r3, [[TMP]], 32, 47, 0 +; CHECK-MAIN: cr [[TMP]], %r3 +; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]] +; CHECK-MAIN: risbg %r4, [[TMP]], 32, 47, 0 +; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -16({{%r[1-9]+}}) +; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]]) +; CHECK-MAIN: jl [[LOOP]] +; CHECK-MAIN: [[EXIT]]: +; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]] +; CHECK-MAIN: br %r14 +; CHECK-MAIN: [[LABEL]]: +; CHECK-MAIN: jg g +; +; CHECK-SHIFT-LABEL: f5: +; CHECK-SHIFT: sll %r2, 3 +; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2 +; CHECK-SHIFT: rll +; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -16([[NEGSHIFT]]) +define void @f5(i16 *%src, i16 %cmp, i16 %swap) { + %pair = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst + %cond = extractvalue { i16, i1 } %pair, 1 + br i1 %cond, label %exit, label %call + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + Index: test/CodeGen/SystemZ/cmpxchg-03.ll =================================================================== --- test/CodeGen/SystemZ/cmpxchg-03.ll +++ test/CodeGen/SystemZ/cmpxchg-03.ll @@ -155,3 +155,42 @@ %res = zext i1 %val to i32 ret i32 %res } + +declare void @g() + +; Check using the comparison result for a branch. +; CHECK-LABEL: f14 +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK-NEXT: jge g +; CHECK: br %r14 +define void @f14(i32 %cmp, i32 %swap, i32 *%src) { + %pairval = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst + %cond = extractvalue { i32, i1 } %pairval, 1 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +; CHECK-LABEL: f15 +; CHECK: cs %r2, %r3, 0(%r4) +; CHECK-NEXT: jgl g +; CHECK: br %r14 +define void @f15(i32 %cmp, i32 %swap, i32 *%src) { + %pairval = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst + %cond = extractvalue { i32, i1 } %pairval, 1 + br i1 %cond, label %exit, label %call + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + Index: test/CodeGen/SystemZ/cmpxchg-04.ll =================================================================== --- test/CodeGen/SystemZ/cmpxchg-04.ll +++ test/CodeGen/SystemZ/cmpxchg-04.ll @@ -120,3 +120,41 @@ ret i32 %res } +declare void @g() + +; Check using the comparison result for a branch. +; CHECK-LABEL: f11 +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK-NEXT: jge g +; CHECK: br %r14 +define void @f11(i64 %cmp, i64 %swap, i64 *%src) { + %pairval = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst + %cond = extractvalue { i64, i1 } %pairval, 1 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +; CHECK-LABEL: f12 +; CHECK: csg %r2, %r3, 0(%r4) +; CHECK-NEXT: jgl g +; CHECK: br %r14 +define void @f12(i64 %cmp, i64 %swap, i64 *%src) { + %pairval = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst + %cond = extractvalue { i64, i1 } %pairval, 1 + br i1 %cond, label %exit, label %call + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + Index: test/CodeGen/SystemZ/cmpxchg-06.ll =================================================================== --- test/CodeGen/SystemZ/cmpxchg-06.ll +++ test/CodeGen/SystemZ/cmpxchg-06.ll @@ -129,3 +129,54 @@ %res = zext i1 %val to i32 ret i32 %res } + +declare void @g() + +; Check using the comparison result for a branch. +; CHECK-LABEL: f11 +; CHECK-DAG: lg %r1, 8(%r3) +; CHECK-DAG: lg %r0, 0(%r3) +; CHECK-DAG: lg %r13, 8(%r2) +; CHECK-DAG: lg %r12, 0(%r2) +; CHECK: cdsg %r12, %r0, 0(%r4) +; CHECK-NEXT: jl [[LABEL:\.[^ ]*]] +; CHECK: jg g +; CHECK: [[LABEL]]: +; CHECK: br %r14 +define void @f11(i128 %cmp, i128 %swap, i128 *%src) { + %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst + %cond = extractvalue { i128, i1 } %pairval, 1 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +; CHECK-LABEL: f12 +; CHECK-DAG: lg %r1, 8(%r3) +; CHECK-DAG: lg %r0, 0(%r3) +; CHECK-DAG: lg %r13, 8(%r2) +; CHECK-DAG: lg %r12, 0(%r2) +; CHECK: cdsg %r12, %r0, 0(%r4) +; CHECK-NEXT: jl [[LABEL:\.[^ ]*]] +; CHECK: br %r14 +; CHECK: [[LABEL]]: +; CHECK: jg g +define void @f12(i128 %cmp, i128 %swap, i128 *%src) { + %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst + %cond = extractvalue { i128, i1 } %pairval, 1 + br i1 %cond, label %exit, label %call + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + Index: test/CodeGen/SystemZ/dag-combine-02.ll =================================================================== --- test/CodeGen/SystemZ/dag-combine-02.ll +++ test/CodeGen/SystemZ/dag-combine-02.ll @@ -94,8 +94,8 @@ ;