Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -602,7 +602,7 @@ SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); - unsigned OldBits = Op1.getValueSizeInBits(); + unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); unsigned ShiftOp; @@ -624,7 +624,7 @@ SDValue Op2Promoted = GetPromotedInteger(Op2); EVT PromotedType = Op1Promoted.getValueType(); - unsigned NewBits = Op1Promoted.getValueSizeInBits(); + unsigned NewBits = PromotedType.getScalarSizeInBits(); unsigned SHLAmount = NewBits - OldBits; EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2460,6 +2460,10 @@ case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::UADDSAT: + case ISD::SADDSAT: + case ISD::USUBSAT: + case ISD::SSUBSAT: Res = WidenVecRes_Binary(N); break; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -226,10 +226,6 @@ SCALEF, SCALEFS, - // Integer add/sub with unsigned saturation. - ADDUS, - SUBUS, - // Integer add/sub with signed saturation. ADDS, SUBS, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -829,6 +829,17 @@ setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); } + setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); + setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); + setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); + setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); + // Use widening instead of promotion. + for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, + MVT::v4i16, MVT::v2i16 }) { + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); + } + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); @@ -1200,6 +1211,11 @@ setOperationAction(ISD::SMIN, MVT::v4i64, Custom); setOperationAction(ISD::UMIN, MVT::v4i64, Custom); + setOperationAction(ISD::UADDSAT, MVT::v32i8, Legal); + setOperationAction(ISD::UADDSAT, MVT::v16i16, Legal); + setOperationAction(ISD::USUBSAT, MVT::v32i8, Legal); + setOperationAction(ISD::USUBSAT, MVT::v16i16, Legal); + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); @@ -1657,6 +1673,8 @@ setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -19169,7 +19187,7 @@ break; } - SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1); + SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, DAG.getConstant(0, dl, VT)); } @@ -26250,11 +26268,12 @@ } return; } + case ISD::UADDSAT: + case ISD::USUBSAT: case X86ISD::VPMADDWD: - case X86ISD::ADDUS: - case X86ISD::SUBUS: case X86ISD::AVG: { - // Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening. + // Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD + // by widening. assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT VT = N->getValueType(0); @@ -26988,8 +27007,6 @@ case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND"; - case X86ISD::ADDUS: return "X86ISD::ADDUS"; - case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; @@ -34008,9 +34025,9 @@ SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1); SDValue CondRHS = Cond->getOperand(1); - auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops); + auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); }; // Look for a general sub with unsigned saturation first. @@ -34019,22 +34036,22 @@ if ((CC == ISD::SETUGE || CC == ISD::SETUGT) && Other->getOpcode() == ISD::SUB && OpRHS == CondRHS) return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - SUBUSBuilder); + USUBSATBuilder); if (auto *OpRHSBV = dyn_cast(OpRHS)) { if (isa(CondRHS)) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > C-1 ? x+-C : 0 --> subus x, C - auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1); }; if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && - ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) { + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) { OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), OpRHS); return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - SUBUSBuilder); + USUBSATBuilder); } // Another special case: If C was a sign bit, the sub has been @@ -34050,7 +34067,7 @@ // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - SUBUSBuilder); + USUBSATBuilder); } } } @@ -34083,9 +34100,9 @@ if (Other.getNode() && Other.getOpcode() == ISD::ADD) { SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); - auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops); + auto UADDSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + return DAG.getNode(ISD::UADDSAT, DL, Ops[0].getValueType(), Ops); }; // Canonicalize condition operands. @@ -34100,20 +34117,20 @@ if (CC == ISD::SETULE && Other == CondRHS && (OpLHS == CondLHS || OpRHS == CondLHS)) return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - ADDUSBuilder); + UADDSATBuilder); if (isa(OpRHS) && isa(CondRHS) && CondLHS == OpLHS) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > ~C ? x+C : ~0 --> addus x, C - auto MatchADDUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == ~Op->getAPIntValue(); }; if (CC == ISD::SETULE && - ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchADDUS)) + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT)) return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - ADDUSBuilder); + UADDSATBuilder); } } } @@ -40696,16 +40713,16 @@ } else return SDValue(); - auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops); + auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); }; // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { SubusLHS, SubusRHS }, SUBUSBuilder); + { SubusLHS, SubusRHS }, USUBSATBuilder); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, @@ -40737,7 +40754,7 @@ SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); SDValue Psubus = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, - { NewSubusLHS, NewSubusRHS }, SUBUSBuilder); + { NewSubusLHS, NewSubusRHS }, USUBSATBuilder); // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -4834,9 +4834,9 @@ SchedWriteVecALU, HasBWI, 1>; defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, SchedWriteVecALU, HasBWI, 0>; -defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, +defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, SchedWriteVecALU, HasBWI, 1>; -defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, +defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, SchedWriteVecALU, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, SchedWritePMULLD, HasAVX512, 1>, T8PD; Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -227,8 +227,6 @@ SDTCisVec<1>, SDTCisSameAs<2, 1>]>; -def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>; -def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>; def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>; def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -3627,9 +3627,9 @@ SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8, +defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16, +defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; @@ -3649,9 +3649,9 @@ SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; -defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, +defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; -defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, +defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;