diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -245,10 +245,10 @@ %2:_(s32) = G_ADD %0:_(s32), %1:_(s32) -G_SADDSAT, G_UADDSAT, G_SSUBSAT, G_USUBSAT -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +G_SADDSAT, G_UADDSAT, G_SSUBSAT, G_USUBSAT, G_SSHLSAT, G_USHLSAT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Signed and unsigned addition and subtraction with saturation. +Signed and unsigned addition, subtraction and left shift with saturation. .. code-block:: none diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14547,6 +14547,108 @@ %res = call i4 @llvm.usub.sat.i4(i4 2, i4 6) ; %res = 0 +'``llvm.sshl.sat.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sshl.sat`` +on integers or vectors of integers of any bit width. + +:: + + declare i16 @llvm.sshl.sat.i16(i16 %a, i16 %b) + declare i32 @llvm.sshl.sat.i32(i32 %a, i32 %b) + declare i64 @llvm.sshl.sat.i64(i64 %a, i64 %b) + declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + +Overview +""""""""" + +The '``llvm.sshl.sat``' family of intrinsic functions perform signed +saturating left shift on the first argument. + +Arguments +"""""""""" + +The arguments (``%a`` and ``%b``) and the result may be of integer types of any +bit width, but they must have the same bit width. ``%a`` is the value to be +shifted, and ``%b`` is the amount to shift by. If ``b`` is (statically or +dynamically) equal to or larger than the integer bit width of the arguments, +the result is a :ref:`poison value `. If the arguments are +vectors, each vector element of ``a`` is shifted by the corresponding shift +amount in ``b``. + + +Semantics: +"""""""""" + +The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the arguments. The minimum value is the +smallest signed value representable by this bit width. + + +Examples +""""""""" + +.. code-block:: llvm + + %res = call i4 @llvm.sshl.sat.i4(i4 2, i4 1) ; %res = 4 + %res = call i4 @llvm.sshl.sat.i4(i4 2, i4 2) ; %res = 7 + %res = call i4 @llvm.sshl.sat.i4(i4 -5, i4 1) ; %res = -8 + %res = call i4 @llvm.sshl.sat.i4(i4 -1, i4 1) ; %res = -2 + + +'``llvm.ushl.sat.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ushl.sat`` +on integers or vectors of integers of any bit width. + +:: + + declare i16 @llvm.ushl.sat.i16(i16 %a, i16 %b) + declare i32 @llvm.ushl.sat.i32(i32 %a, i32 %b) + declare i64 @llvm.ushl.sat.i64(i64 %a, i64 %b) + declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + +Overview +""""""""" + +The '``llvm.ushl.sat``' family of intrinsic functions perform unsigned +saturating left shift on the first argument. + +Arguments +"""""""""" + +The arguments (``%a`` and ``%b``) and the result may be of integer types of any +bit width, but they must have the same bit width. ``%a`` is the value to be +shifted, and ``%b`` is the amount to shift by. If ``b`` is (statically or +dynamically) equal to or larger than the integer bit width of the arguments, +the result is a :ref:`poison value `. If the arguments are +vectors, each vector element of ``a`` is shifted by the corresponding shift +amount in ``b``. + +Semantics: +"""""""""" + +The maximum value this operation can clamp to is the largest unsigned value +representable by the bit width of the arguments. + + +Examples +""""""""" + +.. code-block:: llvm + + %res = call i4 @llvm.ushl.sat.i4(i4 2, i4 1) ; %res = 4 + %res = call i4 @llvm.ushl.sat.i4(i4 3, i4 3) ; %res = 15 + + Fixed Point Arithmetic Intrinsics --------------------------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -167,8 +167,8 @@ widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); LegalizeResult widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); - LegalizeResult widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy); + LegalizeResult + widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); /// Helper function to split a wide generic register into bitwise blocks with /// the given Type (which implies the number of blocks needed). The generic @@ -344,6 +344,7 @@ LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI); LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI); + LegalizeResult lowerShlSat(MachineInstr &MI); LegalizeResult lowerBswap(MachineInstr &MI); LegalizeResult lowerBitreverse(MachineInstr &MI); LegalizeResult lowerReadWriteRegister(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -310,6 +310,16 @@ SSUBSAT, USUBSAT, + /// RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift. The first + /// operand is the value to be shifted, and the second argument is the amount + /// to shift by. Both must be integers of the same bit width (W). If the true + /// value of LHS << RHS exceeds the largest value that can be represented by + /// W bits, the resulting value is this maximum value, Otherwise, if this + /// value is less than the smallest value that can be represented by W bits, + /// the resulting value is this minimum value. + SSHLSAT, + USHLSAT, + /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication /// on /// 2 integers with the same width and scale. SCALE represents the scale of diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4391,6 +4391,10 @@ /// method accepts integers as its arguments. SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[US]SHLSAT. This + /// method accepts integers as its arguments. + SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This /// method accepts integers as its arguments. SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -999,6 +999,12 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_sshl_sat : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_ushl_sat : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===// // diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -475,6 +475,12 @@ /// Generic saturating signed subtraction. HANDLE_TARGET_OPCODE(G_SSUBSAT) +/// Generic saturating unsigned left shift. +HANDLE_TARGET_OPCODE(G_USHLSAT) + +/// Generic saturating signed left shift. +HANDLE_TARGET_OPCODE(G_SSHLSAT) + // Perform signed fixed point multiplication HANDLE_TARGET_OPCODE(G_SMULFIX) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -545,6 +545,22 @@ let isCommutable = 0; } +// Generic saturating unsigned left shift. +def G_USHLSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = 0; + let isCommutable = 0; +} + +// Generic saturating signed left shift. +def G_SSHLSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = 0; + let isCommutable = 0; +} + /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point /// multiplication on 2 integers with the same width and scale. SCALE /// represents the scale of both operands as fixed point numbers. This diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -71,6 +71,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -396,6 +396,8 @@ def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>; def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; +def sshlsat : SDNode<"ISD::SSHLSAT" , SDTIntBinOp>; +def ushlsat : SDNode<"ISD::USHLSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1501,6 +1501,10 @@ return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder); case Intrinsic::ssub_sat: return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder); + case Intrinsic::ushl_sat: + return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder); + case Intrinsic::sshl_sat: + return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder); case Intrinsic::umin: return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder); case Intrinsic::umax: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1715,14 +1715,17 @@ } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { +LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT || - MI.getOpcode() == TargetOpcode::G_SSUBSAT; + MI.getOpcode() == TargetOpcode::G_SSUBSAT || + MI.getOpcode() == TargetOpcode::G_SSHLSAT; + bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT; // We can convert this to: // 1. Any extend iN to iM // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT + // 3. [US][ADD|SUB|SHL]SAT // 4. L/ASHR by M-N // // It may be more efficient to lower this to a min and a max operation in @@ -1733,11 +1736,14 @@ unsigned NewBits = WideTy.getScalarSizeInBits(); unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits(); + // Shifts must zero-extend the RHS to preserve the unsigned quantity, and + // must not left shift the RHS to preserve the shift amount. auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1)); - auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); + auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2)) + : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2)); auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount); auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK); - auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK); + auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK); auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {ShiftL, ShiftR}, MI.getFlags()); @@ -1790,9 +1796,11 @@ } case TargetOpcode::G_SADDSAT: case TargetOpcode::G_SSUBSAT: + case TargetOpcode::G_SSHLSAT: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: - return widenScalarAddSubSat(MI, TypeIdx, WideTy); + case TargetOpcode::G_USHLSAT: + return widenScalarAddSubShlSat(MI, TypeIdx, WideTy); case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -2936,6 +2944,9 @@ return lowerAddSubSatToMinMax(MI); return lowerAddSubSatToAddoSubo(MI); } + case G_SSHLSAT: + case G_USHLSAT: + return lowerShlSat(MI); } } @@ -3760,6 +3771,8 @@ case G_SHL: case G_LSHR: case G_ASHR: + case G_SSHLSAT: + case G_USHLSAT: case G_CTLZ: case G_CTLZ_ZERO_UNDEF: case G_CTTZ: @@ -5770,6 +5783,40 @@ } LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShlSat(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT || + MI.getOpcode() == TargetOpcode::G_USHLSAT) && + "Expected shlsat opcode!"); + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT; + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Res); + LLT BoolTy = Ty.changeElementSize(1); + + unsigned BW = Ty.getScalarSizeInBits(); + auto Result = MIRBuilder.buildShl(Ty, LHS, RHS); + auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS) + : MIRBuilder.buildLShr(Ty, Result, RHS); + + MachineInstrBuilder SatVal; + if (IsSigned) { + auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW)); + auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW)); + auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS, + MIRBuilder.buildConstant(Ty, 0)); + SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax); + } else { + SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW)); + } + auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, Ty, LHS, Orig); + MIRBuilder.buildSelect(Res, Ov, SatVal, Result); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -977,7 +977,9 @@ } case TargetOpcode::G_SHL: case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: { + case TargetOpcode::G_LSHR: + case TargetOpcode::G_USHLSAT: + case TargetOpcode::G_SSHLSAT: { assert(DstOps.size() == 1 && "Invalid Dst"); assert(SrcOps.size() == 2 && "Invalid Srcs"); validateShiftOp(DstOps[0].getLLTTy(*getMRI()), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1118,7 +1118,9 @@ case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: - case ISD::USUBSAT: { + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; } @@ -3476,6 +3478,10 @@ case ISD::USUBSAT: Results.push_back(TLI.expandAddSubSat(Node, DAG)); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: + Results.push_back(TLI.expandShlSat(Node, DAG)); + break; case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -154,7 +154,9 @@ case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: - case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break; case ISD::SMULFIX: case ISD::SMULFIXSAT: @@ -700,11 +702,11 @@ return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT); } -SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { // If the promoted type is legal, we can convert this to: // 1. ANY_EXTEND iN to iM // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT + // 3. [US][ADD|SUB|SHL]SAT // 4. L/ASHR by M-N // Else it is more efficient to convert this to a min and a max // operation in the higher precision arithmetic. @@ -714,9 +716,13 @@ unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); + bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT; SDValue Op1Promoted, Op2Promoted; - if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + if (IsShift) { + Op1Promoted = GetPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { Op1Promoted = ZExtPromotedInteger(Op1); Op2Promoted = ZExtPromotedInteger(Op2); } else { @@ -726,20 +732,24 @@ EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + // Shift cannot use a min/max expansion, we can't detect overflow if all of + // the bits have been shifted out. + if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { unsigned ShiftOp; switch (Opcode) { case ISD::SADDSAT: case ISD::SSUBSAT: + case ISD::SSHLSAT: ShiftOp = ISD::SRA; break; case ISD::UADDSAT: case ISD::USUBSAT: + case ISD::USHLSAT: ShiftOp = ISD::SRL; break; default: llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); + "addition, subtraction or left shift"); } unsigned SHLAmount = NewBits - OldBits; @@ -747,8 +757,9 @@ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + if (!IsShift) + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); SDValue Result = DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); @@ -2025,6 +2036,9 @@ case ISD::SSUBSAT: case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -3147,6 +3161,12 @@ SplitInteger(Result, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Result = TLI.expandShlSat(N, DAG); + SplitInteger(Result, Lo, Hi); +} + /// This performs an expansion of the integer result for a fixed point /// multiplication. The default expansion performs rounding down towards /// negative infinity, though targets that do care about rounding should specify diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -341,7 +341,7 @@ SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); + SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); @@ -442,6 +442,7 @@ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -454,6 +454,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -129,6 +129,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: case ISD::FPOW: case ISD::FREM: @@ -942,6 +944,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: @@ -2792,6 +2796,8 @@ case ISD::SADDSAT: case ISD::USUBSAT: case ISD::SSUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: Res = WidenVecRes_Binary(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6332,6 +6332,18 @@ setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } + case Intrinsic::sshl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::ushl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } case Intrinsic::smul_fix: case Intrinsic::umul_fix: case Intrinsic::smul_fix_sat: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,8 @@ case ISD::UADDSAT: return "uaddsat"; case ISD::SSUBSAT: return "ssubsat"; case ISD::USUBSAT: return "usubsat"; + case ISD::SSHLSAT: return "sshlsat"; + case ISD::USHLSAT: return "ushlsat"; case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7385,6 +7385,41 @@ } } +SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + bool IsSigned = Opcode == ISD::SSHLSAT; + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + assert((Node->getOpcode() == ISD::SSHLSAT || + Node->getOpcode() == ISD::USHLSAT) && + "Expected a SHLSAT opcode"); + assert(VT == RHS.getValueType() && "Expected operands to be the same type"); + assert(VT.isInteger() && "Expected operands to be integers"); + + // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. + + unsigned BW = VT.getScalarSizeInBits(); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); + SDValue Orig = + DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS); + + SDValue SatVal; + if (IsSigned) { + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); + SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), + SatMin, SatMax, ISD::SETLT); + } else { + SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); + } + Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE); + + return Result; +} + SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -657,6 +657,8 @@ setOperationAction(ISD::UADDSAT, VT, Expand); setOperationAction(ISD::SSUBSAT, VT, Expand); setOperationAction(ISD::USUBSAT, VT, Expand); + setOperationAction(ISD::SSHLSAT, VT, Expand); + setOperationAction(ISD::USHLSAT, VT, Expand); setOperationAction(ISD::SMULFIX, VT, Expand); setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4986,15 +4986,17 @@ case Intrinsic::sadd_sat: case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: - case Intrinsic::usub_sat: { + case Intrinsic::usub_sat: + case Intrinsic::sshl_sat: + case Intrinsic::ushl_sat: { Value *Op1 = Call.getArgOperand(0); Value *Op2 = Call.getArgOperand(1); Assert(Op1->getType()->isIntOrIntVectorTy(), - "first operand of [us][add|sub]_sat must be an int type or vector " - "of ints"); + "first operand of [us][add|sub|shl]_sat must be an int type or " + "vector of ints"); Assert(Op2->getType()->isIntOrIntVectorTy(), - "second operand of [us][add|sub]_sat must be an int type or vector " - "of ints"); + "second operand of [us][add|sub|shl]_sat must be an int type or " + "vector of ints"); break; } case Intrinsic::smul_fix: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1284,6 +1284,11 @@ Shifts.clampScalar(1, S32, S32); Shifts.clampScalar(0, S16, S64); Shifts.widenScalarToNextPow2(0, 16); + + getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT}) + .minScalar(0, S16) + .scalarize(0) + .lower(); } else { // Make sure we legalize the shift amount type first, as the general // expansion for the shifted type will produce much worse code if it hasn't @@ -1291,6 +1296,11 @@ Shifts.clampScalar(1, S32, S32); Shifts.clampScalar(0, S32, S64); Shifts.widenScalarToNextPow2(0, 32); + + getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT}) + .minScalar(0, S32) + .scalarize(0) + .lower(); } Shifts.scalarize(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -344,6 +344,12 @@ # DEBUG-NEXT: G_SSUBSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_USHLSAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_SSHLSAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_SMULFIX (opcode {{[0-9]+}}): 1 type index, 1 imm index # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -316,3 +316,161 @@ ret <2 x i32> %res } declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) + +define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { + ; CHECK-LABEL: name: ushlsat_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) + ret i16 %res +} +declare i16 @llvm.ushl.sat.i16(i16, i16) + +define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { + ; CHECK-LABEL: name: ushlsat_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) + ret i32 %res +} +declare i32 @llvm.ushl.sat.i32(i32, i32) + +define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { + ; CHECK-LABEL: name: ushlsat_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) + ret i64 %res +} +declare i64 @llvm.ushl.sat.i64(i64, i64) + +define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { + ; CHECK-LABEL: name: ushlsat_v2i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + ret <2 x i32> %res +} +declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>) + +define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { + ; CHECK-LABEL: name: sshlsat_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) + ret i16 %res +} +declare i16 @llvm.sshl.sat.i16(i16, i16) + +define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { + ; CHECK-LABEL: name: sshlsat_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) + ret i32 %res +} +declare i32 @llvm.sshl.sat.i32(i32, i32) + +define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { + ; CHECK-LABEL: name: sshlsat_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) + ret i64 %res +} +declare i64 @llvm.sshl.sat.i64(i64, i64) + +define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { + ; CHECK-LABEL: name: sshlsat_v2i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) + ret <2 x i32> %res +} +declare <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -0,0 +1,952 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: sshlsat_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: sshlsat_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: sshlsat_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s7) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[ZEXT]](s16) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL1]] + ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR1]](s16) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT1]](s32) + ; GFX9-LABEL: name: sshlsat_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s7) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[ZEXT]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL1]] + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_SSHLSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: sshlsat_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: sshlsat_s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: sshlsat_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[ZEXT]](s16) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL1]] + ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR1]](s16) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX8: $vgpr0 = COPY [[ANYEXT1]](s32) + ; GFX9-LABEL: name: sshlsat_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[ZEXT]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL1]] + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX9: $vgpr0 = COPY [[ANYEXT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_SSHLSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: sshlsat_v2s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: sshlsat_v2s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C6]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[C5]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C2]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C4]], [[C5]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL3]] + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C2]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY6]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: sshlsat_v2s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX8: [[SSHLSAT:%[0-9]+]]:_(s8) = G_SSHLSAT [[UV]], [[UV2]](s8) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV3]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[ZEXT]](s16) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL1]] + ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR1]](s16) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSHLSAT]](s8), [[TRUNC2]](s8) + ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT1]](s32) + ; GFX9-LABEL: name: sshlsat_v2s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX9: [[SSHLSAT:%[0-9]+]]:_(s8) = G_SSHLSAT [[UV]], [[UV2]](s8) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV3]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[ZEXT]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL1]] + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR1]](s16) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSHLSAT]](s8), [[TRUNC2]](s8) + ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s8>) = G_BITCAST %2 + %5:_(<2 x s8>) = G_BITCAST %3 + %6:_(<2 x s8>) = G_SSHLSAT %4, %5 + %7:_(s16) = G_BITCAST %6 + %8:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 +... + +--- +name: sshlsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: sshlsat_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: sshlsat_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: sshlsat_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SSHLSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: sshlsat_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: sshlsat_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL3]] + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-LABEL: name: sshlsat_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV]], [[UV2]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[UV3]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s16), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV1]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSHLSAT]](s16), [[SELECT1]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: sshlsat_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV]], [[UV2]](s16) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[UV3]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s16), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV1]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSHLSAT]](s16), [[SELECT1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SSHLSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: sshlsat_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; GFX6-LABEL: name: sshlsat_v3s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV1]](<3 x s16>), 0 + ; GFX6: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL3]] + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[SELECT4]], [[SHL5]] + ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX8-LABEL: name: sshlsat_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX8: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV2]], [[UV5]](s16) + ; GFX8: [[SSHLSAT1:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV3]], [[UV6]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV4]], [[UV7]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[UV7]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV4]](s16), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV4]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSHLSAT]](s16), [[SSHLSAT1]](s16), [[SELECT1]](s16) + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX8: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[UV8]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-LABEL: name: sshlsat_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX9: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV2]], [[UV5]](s16) + ; GFX9: [[SSHLSAT1:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV3]], [[UV6]](s16) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV4]], [[UV7]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[UV7]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV4]](s16), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV4]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSHLSAT]](s16), [[SSHLSAT1]](s16), [[SELECT1]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[UV8]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<3 x s16>) = G_SSHLSAT %1, %2 + %4:_(<3 x s16>) = G_IMPLICIT_DEF + %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 + $vgpr0_vgpr1_vgpr2 = COPY %5 +... + +--- +name: sshlsat_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: sshlsat_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL1]] + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL3]] + ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[SELECT4]], [[SHL5]] + ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) + ; GFX6: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s32), [[SELECT6]], [[SHL7]] + ; GFX6: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ASHR7]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-LABEL: name: sshlsat_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV]], [[UV4]](s16) + ; GFX8: [[SSHLSAT1:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV1]], [[UV5]](s16) + ; GFX8: [[SSHLSAT2:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV2]], [[UV6]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV3]], [[UV7]](s16) + ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[UV7]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s16), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV3]](s16), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSHLSAT]](s16), [[SSHLSAT1]](s16), [[SSHLSAT2]](s16), [[SELECT1]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: sshlsat_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV]], [[UV4]](s16) + ; GFX9: [[SSHLSAT1:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV1]], [[UV5]](s16) + ; GFX9: [[SSHLSAT2:%[0-9]+]]:_(s16) = G_SSHLSAT [[UV2]], [[UV6]](s16) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV3]], [[UV7]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[UV7]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s16), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV3]](s16), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s16), [[SELECT]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSHLSAT]](s16), [[SSHLSAT1]](s16), [[SSHLSAT2]](s16), [[SELECT1]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_SSHLSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: sshlsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: sshlsat_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL]] + ; GFX6: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-LABEL: name: sshlsat_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL]] + ; GFX8: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX9-LABEL: name: sshlsat_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL]] + ; GFX9: $vgpr0 = COPY [[SELECT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_SSHLSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: sshlsat_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: sshlsat_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL]] + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL1]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: sshlsat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL]] + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL1]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: sshlsat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SHL]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[SELECT2]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_SSHLSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: sshlsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: sshlsat_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s64) + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[SELECT]], [[SHL]] + ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX8-LABEL: name: sshlsat_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s64) + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[SELECT]], [[SHL]] + ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX9-LABEL: name: sshlsat_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s64) + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY1]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[SELECT]], [[SHL]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SSHLSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: sshlsat_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GFX6-LABEL: name: sshlsat_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[UV]], [[UV2]](s64) + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s64) + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[UV3]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[SELECT]], [[SHL]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSHLSAT]](s64), [[SELECT1]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: sshlsat_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[UV]], [[UV2]](s64) + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s64) + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[UV3]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[SELECT]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSHLSAT]](s64), [[SELECT1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: sshlsat_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[UV]], [[UV2]](s64) + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s64) + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[UV3]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s64), [[SELECT]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSHLSAT]](s64), [[SELECT1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_SSHLSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -0,0 +1,800 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: ushlsat_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ushlsat_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SHL1]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: ushlsat_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s7) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[ZEXT]](s16) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C1]], [[SHL1]] + ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR1]](s16) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT1]](s32) + ; GFX9-LABEL: name: ushlsat_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s7) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[ZEXT]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C1]], [[SHL1]] + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_USHLSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: ushlsat_s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ushlsat_s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SHL1]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: ushlsat_s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[ZEXT]](s16) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C1]], [[SHL1]] + ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s16) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX8: $vgpr0 = COPY [[ANYEXT1]](s32) + ; GFX9-LABEL: name: ushlsat_s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC1]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[ZEXT]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C1]], [[SHL1]] + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8) + ; GFX9: $vgpr0 = COPY [[ANYEXT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_USHLSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: ushlsat_v2s8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ushlsat_v2s8 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C2]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR6]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C4]], [[SHL1]] + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C2]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR8]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C4]], [[SHL3]] + ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C2]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY6]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: ushlsat_v2s8 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX8: [[USHLSAT:%[0-9]+]]:_(s8) = G_USHLSAT [[UV]], [[UV2]](s8) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV3]](s8) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[ZEXT]](s16) + ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C1]], [[SHL1]] + ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C]](s16) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s16) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USHLSAT]](s8), [[TRUNC2]](s8) + ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT1]](s32) + ; GFX9-LABEL: name: ushlsat_v2s8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16) + ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>) + ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>) + ; GFX9: [[USHLSAT:%[0-9]+]]:_(s8) = G_USHLSAT [[UV]], [[UV2]](s8) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV3]](s8) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[ZEXT]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[ZEXT]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C1]], [[SHL1]] + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s16) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USHLSAT]](s8), [[TRUNC2]](s8) + ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>) + ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s8>) = G_BITCAST %2 + %5:_(<2 x s8>) = G_BITCAST %3 + %6:_(<2 x s8>) = G_USHLSAT %4, %5 + %7:_(s16) = G_BITCAST %6 + %8:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 +... + +--- +name: ushlsat_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ushlsat_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SHL1]] + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: ushlsat_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: ushlsat_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_USHLSAT %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: ushlsat_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ushlsat_v2s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SHL1]] + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SHL3]] + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-LABEL: name: ushlsat_v2s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[UV]], [[UV2]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[UV3]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV1]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USHLSAT]](s16), [[SELECT]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: ushlsat_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[UV]], [[UV2]](s16) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV1]], [[UV3]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[UV3]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV1]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USHLSAT]](s16), [[SELECT]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_USHLSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ushlsat_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; GFX6-LABEL: name: ushlsat_v3s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV1]](<3 x s16>), 0 + ; GFX6: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SHL1]] + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SHL3]] + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[C2]], [[SHL5]] + ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX8-LABEL: name: ushlsat_v3s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX8: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[UV2]], [[UV5]](s16) + ; GFX8: [[USHLSAT1:%[0-9]+]]:_(s16) = G_USHLSAT [[UV3]], [[UV6]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV4]], [[UV7]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[UV7]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV4]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USHLSAT]](s16), [[USHLSAT1]](s16), [[SELECT]](s16) + ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX8: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[UV8]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-LABEL: name: ushlsat_v3s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>) + ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>) + ; GFX9: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[UV2]], [[UV5]](s16) + ; GFX9: [[USHLSAT1:%[0-9]+]]:_(s16) = G_USHLSAT [[UV3]], [[UV6]](s16) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV4]], [[UV7]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[UV7]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV4]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USHLSAT]](s16), [[USHLSAT1]](s16), [[SELECT]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[UV8]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 + %3:_(<3 x s16>) = G_USHLSAT %1, %2 + %4:_(<3 x s16>) = G_IMPLICIT_DEF + %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4 + $vgpr0_vgpr1_vgpr2 = COPY %5 +... + +--- +name: ushlsat_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: ushlsat_v4s16 + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SHL1]] + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SHL3]] + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[C2]], [[SHL5]] + ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) + ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s32), [[C2]], [[SHL7]] + ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-LABEL: name: ushlsat_v4s16 + ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[UV]], [[UV4]](s16) + ; GFX8: [[USHLSAT1:%[0-9]+]]:_(s16) = G_USHLSAT [[UV1]], [[UV5]](s16) + ; GFX8: [[USHLSAT2:%[0-9]+]]:_(s16) = G_USHLSAT [[UV2]], [[UV6]](s16) + ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV3]], [[UV7]](s16) + ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[UV7]](s16) + ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV3]](s16), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USHLSAT]](s16), [[USHLSAT1]](s16), [[USHLSAT2]](s16), [[SELECT]](s16) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: ushlsat_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[UV]], [[UV4]](s16) + ; GFX9: [[USHLSAT1:%[0-9]+]]:_(s16) = G_USHLSAT [[UV1]], [[UV5]](s16) + ; GFX9: [[USHLSAT2:%[0-9]+]]:_(s16) = G_USHLSAT [[UV2]], [[UV6]](s16) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[UV3]], [[UV7]](s16) + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[UV7]](s16) + ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s16) = G_ICMP intpred(ne), [[UV3]](s16), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s16), [[C]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USHLSAT]](s16), [[USHLSAT1]](s16), [[USHLSAT2]](s16), [[SELECT]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = G_USHLSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ushlsat_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: ushlsat_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[SHL]] + ; GFX6: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-LABEL: name: ushlsat_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[SHL]] + ; GFX8: $vgpr0 = COPY [[SELECT]](s32) + ; GFX9-LABEL: name: ushlsat_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[SHL]] + ; GFX9: $vgpr0 = COPY [[SELECT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_USHLSAT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: ushlsat_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: ushlsat_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[SHL]] + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C]], [[SHL1]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: ushlsat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[SHL]] + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C]], [[SHL1]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: ushlsat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[SHL]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_USHLSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ushlsat_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: ushlsat_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s64) + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[COPY1]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[C]], [[SHL]] + ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-LABEL: name: ushlsat_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s64) + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[COPY1]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[C]], [[SHL]] + ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-LABEL: name: ushlsat_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[COPY1]](s64) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[COPY1]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[C]], [[SHL]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_USHLSAT %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: ushlsat_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GFX6-LABEL: name: ushlsat_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[UV]], [[UV2]](s64) + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s64) + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[UV3]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[C]], [[SHL]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USHLSAT]](s64), [[SELECT]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: ushlsat_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[UV]], [[UV2]](s64) + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s64) + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[UV3]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[C]], [[SHL]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USHLSAT]](s64), [[SELECT]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: ushlsat_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[UV]], [[UV2]](s64) + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV3]](s64) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[UV3]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s64), [[C]], [[SHL]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USHLSAT]](s64), [[SELECT]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = G_USHLSAT %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... diff --git a/llvm/test/CodeGen/X86/sshl_sat.ll b/llvm/test/CodeGen/X86/sshl_sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/sshl_sat.ll @@ -0,0 +1,400 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 + +declare i4 @llvm.sshl.sat.i4 (i4, i4) +declare i8 @llvm.sshl.sat.i8 (i8, i8) +declare i15 @llvm.sshl.sat.i15 (i15, i15) +declare i16 @llvm.sshl.sat.i16 (i16, i16) +declare i18 @llvm.sshl.sat.i18 (i18, i18) +declare i32 @llvm.sshl.sat.i32 (i32, i32) +declare i64 @llvm.sshl.sat.i64 (i64, i64) + +define i16 @func(i16 %x, i16 %y) nounwind { +; X64-LABEL: func: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testw %di, %di +; X64-NEXT: sets %al +; X64-NEXT: addl $32767, %eax # imm = 0x7FFF +; X64-NEXT: cmpw %si, %di +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movswl %si, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %dx, %dx +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %di, %dx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 %y) + ret i16 %tmp +} + +define i16 @func2(i8 %x, i8 %y) nounwind { +; X64-LABEL: func2: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: addl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %dl +; X64-NEXT: addl $32767, %edx # imm = 0x7FFF +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movswl %si, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %edi +; X64-NEXT: cmpw %di, %ax +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movswl %si, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func2: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movswl %si, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %dx, %dx +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %di, %dx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %x2 = sext i8 %x to i15 + %y2 = sext i8 %y to i15 + %tmp = call i15 @llvm.sshl.sat.i15(i15 %x2, i15 %y2) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i16 @func3(i15 %x, i8 %y) nounwind { +; X64-LABEL: func3: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: shll $7, %ecx +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: movswl %ax, %edx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %edx +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %di, %di +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %dx, %di +; X64-NEXT: cmovel %eax, %ecx +; X64-NEXT: movswl %cx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func3: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $7, %ecx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movswl %si, %edi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %dx, %dx +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %di, %dx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %y2 = sext i8 %y to i15 + %y3 = shl i15 %y2, 7 + %tmp = call i15 @llvm.sshl.sat.i15(i15 %x, i15 %y3) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i4 @func4(i4 %x, i4 %y) nounwind { +; X64-LABEL: func4: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: andb $15, %cl +; X64-NEXT: shlb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: movzbl %al, %esi +; X64-NEXT: movl %esi, %edx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarb %cl, %dl +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %dil, %dil +; X64-NEXT: sets %al +; X64-NEXT: addl $127, %eax +; X64-NEXT: cmpb %dl, %dil +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: sarb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: andb $15, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: shlb $4, %dl +; X86-NEXT: movb %dl, %ch +; X86-NEXT: shlb %cl, %ch +; X86-NEXT: movzbl %ch, %esi +; X86-NEXT: sarb %cl, %ch +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %dl, %dl +; X86-NEXT: sets %al +; X86-NEXT: addl $127, %eax +; X86-NEXT: cmpb %ch, %dl +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: sarb $4, %al +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i4 @llvm.sshl.sat.i4(i4 %x, i4 %y) + ret i4 %tmp +} + +define i64 @func5(i64 %x, i64 %y) nounwind { +; X64-LABEL: func5: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: sets %dl +; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: addq %rdx, %rax +; X64-NEXT: movq %rdi, %rdx +; X64-NEXT: shlq %cl, %rdx +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: sarq %cl, %rsi +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmoveq %rdx, %rax +; X64-NEXT: retq +; +; X86-LABEL: func5: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: shldl %cl, %eax, %ebx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %ebp, %ebx +; X86-NEXT: cmovnel %edx, %ebp +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: sarl %cl, %edx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovel %edx, %edi +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: shrdl %cl, %ebx, %esi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: xorl %eax, %esi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: movl $-1, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovsl %ecx, %eax +; X86-NEXT: sets %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: orl %edi, %esi +; X86-NEXT: cmovel %ebp, %eax +; X86-NEXT: cmovel %ebx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl + %tmp = call i64 @llvm.sshl.sat.i64(i64 %x, i64 %y) + ret i64 %tmp +} + +define i18 @func6(i16 %x, i16 %y) nounwind { +; X64-LABEL: func6: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movswl %di, %edx +; X64-NEXT: shll $14, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %edi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testl %edx, %edx +; X64-NEXT: sets %al +; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %edi, %edx +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: sarl $14, %eax +; X64-NEXT: retq +; +; X86-LABEL: func6: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movswl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shll $14, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %al +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %edi, %edx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: sarl $14, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %x2 = sext i16 %x to i18 + %y2 = sext i16 %y to i18 + %tmp = call i18 @llvm.sshl.sat.i18(i18 %x2, i18 %y2) + ret i18 %tmp +} + +define i32 @func7(i32 %x, i32 %y) nounwind { +; X64-LABEL: func7: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testl %edi, %edi +; X64-NEXT: sets %al +; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: retq +; +; X86-LABEL: func7: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %al +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %edi, %edx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %tmp = call i32 @llvm.sshl.sat.i32(i32 %x, i32 %y) + ret i32 %tmp +} + +define i8 @func8(i8 %x, i8 %y) nounwind { +; X64-LABEL: func8: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: movzbl %al, %esi +; X64-NEXT: movl %esi, %edx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarb %cl, %dl +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %dil, %dil +; X64-NEXT: sets %al +; X64-NEXT: addl $127, %eax +; X64-NEXT: cmpb %dl, %dil +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func8: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: movb %dl, %ch +; X86-NEXT: shlb %cl, %ch +; X86-NEXT: movzbl %ch, %esi +; X86-NEXT: sarb %cl, %ch +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %dl, %dl +; X86-NEXT: sets %al +; X86-NEXT: addl $127, %eax +; X86-NEXT: cmpb %ch, %dl +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i8 @llvm.sshl.sat.i8(i8 %x, i8 %y) + ret i8 %tmp +} diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 + +declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { +; X64-LABEL: vec: +; X64: # %bb.0: +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X64-NEXT: movd %xmm2, %eax +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] +; X64-NEXT: movd %xmm2, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %cl +; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %esi, %eax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X64-NEXT: movd %xmm3, %eax +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movd %xmm3, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %cl +; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %esi, %eax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %cl +; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %esi, %eax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %cl +; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %esi, %eax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: vec: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: sarl %cl, %ebp +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %bl +; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: cmovel %edi, %ebx +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sarl %cl, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testl %ebp, %ebp +; X86-NEXT: sets %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %eax, %ebp +; X86-NEXT: cmovel %edi, %edx +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: sarl %cl, %ebp +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %esi, %esi +; X86-NEXT: sets %al +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %ebp, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %ebp +; X86-NEXT: sarl %cl, %ebp +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testl %esi, %esi +; X86-NEXT: sets %cl +; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %ebp, %esi +; X86-NEXT: cmovel %edi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, 12(%esi) +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: movl %ebx, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %tmp +} diff --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/ushl_sat.ll @@ -0,0 +1,342 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 + +declare i4 @llvm.ushl.sat.i4 (i4, i4) +declare i8 @llvm.ushl.sat.i8 (i8, i8) +declare i15 @llvm.ushl.sat.i15 (i15, i15) +declare i16 @llvm.ushl.sat.i16 (i16, i16) +declare i18 @llvm.ushl.sat.i18 (i18, i18) +declare i32 @llvm.ushl.sat.i32 (i32, i32) +declare i64 @llvm.ushl.sat.i64 (i64, i64) + +define i16 @func(i16 %x, i16 %y) nounwind { +; X64-LABEL: func: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: movzwl %ax, %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: cmpw %ax, %di +; X64-NEXT: movl $65535, %eax # imm = 0xFFFF +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpw %si, %ax +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 %y) + ret i16 %tmp +} + +define i16 @func2(i8 %x, i8 %y) nounwind { +; X64-LABEL: func2: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: addl %eax, %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movzwl %dx, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %esi +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: movl $65535, %eax # imm = 0xFFFF +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: cwtl +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func2: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %eax, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpw %si, %ax +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %x2 = sext i8 %x to i15 + %y2 = sext i8 %y to i15 + %tmp = call i15 @llvm.ushl.sat.i15(i15 %x2, i15 %y2) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i16 @func3(i15 %x, i8 %y) nounwind { +; X64-LABEL: func3: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: shll $7, %ecx +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edx +; X64-NEXT: cmpw %dx, %di +; X64-NEXT: movl $65535, %ecx # imm = 0xFFFF +; X64-NEXT: cmovel %eax, %ecx +; X64-NEXT: movswl %cx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func3: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $7, %ecx +; X86-NEXT: addl %eax, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpw %si, %ax +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %y2 = sext i8 %y to i15 + %y3 = shl i15 %y2, 7 + %tmp = call i15 @llvm.ushl.sat.i15(i15 %x, i15 %y3) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i4 @func4(i4 %x, i4 %y) nounwind { +; X64-LABEL: func4: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: andb $15, %cl +; X64-NEXT: shlb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: movzbl %al, %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrb %cl, %al +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: movl $255, %eax +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: shrb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: andb $15, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb $4, %al +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: movl $255, %eax +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: shrb $4, %al +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i4 @llvm.ushl.sat.i4(i4 %x, i4 %y) + ret i4 %tmp +} + +define i64 @func5(i64 %x, i64 %y) nounwind { +; X64-LABEL: func5: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: movq %rdi, %rdx +; X64-NEXT: shlq %cl, %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rax +; X64-NEXT: cmpq %rax, %rdi +; X64-NEXT: movq $-1, %rax +; X64-NEXT: cmoveq %rdx, %rax +; X64-NEXT: retq +; +; X86-LABEL: func5: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edi, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: shldl %cl, %edi, %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %esi, %edx +; X86-NEXT: cmovnel %ebx, %esi +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: shrl %cl, %ebp +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovel %ebp, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %ebp, %eax +; X86-NEXT: xorl %edi, %eax +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl %eax, %ebx +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl + %tmp = call i64 @llvm.ushl.sat.i64(i64 %x, i64 %y) + ret i64 %tmp +} + +define i18 @func6(i16 %x, i16 %y) nounwind { +; X64-LABEL: func6: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movswl %di, %eax +; X64-NEXT: shll $14, %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %esi +; X64-NEXT: cmpl %esi, %eax +; X64-NEXT: movl $-1, %eax +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: shrl $14, %eax +; X64-NEXT: retq +; +; X86-LABEL: func6: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $14, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpl %esi, %eax +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: shrl $14, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %x2 = sext i16 %x to i18 + %y2 = sext i16 %y to i18 + %tmp = call i18 @llvm.ushl.sat.i18(i18 %x2, i18 %y2) + ret i18 %tmp +} + +define i32 @func7(i32 %x, i32 %y) nounwind { +; X64-LABEL: func7: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: cmpl %eax, %edi +; X64-NEXT: movl $-1, %eax +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: retq +; +; X86-LABEL: func7: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpl %esi, %eax +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i32 @llvm.ushl.sat.i32(i32 %x, i32 %y) + ret i32 %tmp +} + +define i8 @func8(i8 %x, i8 %y) nounwind { +; X64-LABEL: func8: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: movzbl %al, %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrb %cl, %al +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: movl $255, %eax +; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func8: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: movl $255, %eax +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i8 @llvm.ushl.sat.i8(i8 %x, i8 %y) + ret i8 %tmp +} diff --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 + +declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { +; X64-LABEL: vec: +; X64: # %bb.0: +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X64-NEXT: movd %xmm2, %eax +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] +; X64-NEXT: movd %xmm2, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %esi +; X64-NEXT: cmpl %esi, %eax +; X64-NEXT: movl $-1, %eax +; X64-NEXT: cmovnel %eax, %edx +; X64-NEXT: movd %edx, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X64-NEXT: movd %xmm3, %edx +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movd %xmm3, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpl %edi, %edx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: movd %xmm0, %edx +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpl %edi, %edx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: movd %xmm0, %edx +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpl %edi, %edx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: vec: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: movl %ebp, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovnel %edx, %ebp +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movb %ah, %cl +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpl %esi, %edi +; X86-NEXT: cmovnel %edx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: shrl %cl, %edi +; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmovnel %edx, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl %ebp, (%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %tmp +}