diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14239,6 +14239,101 @@ %res = call i4 @llvm.usub.sat.i4(i4 2, i4 6) ; %res = 0 +'``llvm.sshl.sat.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sshl.sat`` +on integers or vectors of integers of any bit width. + +:: + + declare i16 @llvm.sshl.sat.i16(i16 %a, i16 %b) + declare i32 @llvm.sshl.sat.i32(i32 %a, i32 %b) + declare i64 @llvm.sshl.sat.i64(i64 %a, i64 %b) + declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + +Overview +""""""""" + +The '``llvm.sshl.sat``' family of intrinsic functions perform signed +saturation left shift on the first argument. + +Arguments +"""""""""" + +The arguments (``%a`` and ``%b``) and the result may be of integer types of any +bit width, but they must have the same bit width. ``%a`` is the value to be +shifted, and ``%b`` is the amount to shift by. ``%b`` must be less than the bit +width. + +Semantics: +"""""""""" + +The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the arguments. The minimum value is the +smallest signed value representable by this bit width. + + +Examples +""""""""" + +.. code-block:: llvm + + %res = call i4 @llvm.sshl.sat.i4(i4 2, i4 1) ; %res = 4 + %res = call i4 @llvm.sshl.sat.i4(i4 2, i4 2) ; %res = 7 + %res = call i4 @llvm.sshl.sat.i4(i4 -5, i4 1) ; %res = -8 + %res = call i4 @llvm.sshl.sat.i4(i4 -1, i4 1) ; %res = -2 + + +'``llvm.ushl.sat.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ushl.sat`` +on integers or vectors of integers of any bit width. + +:: + + declare i16 @llvm.ushl.sat.i16(i16 %a, i16 %b) + declare i32 @llvm.ushl.sat.i32(i32 %a, i32 %b) + declare i64 @llvm.ushl.sat.i64(i64 %a, i64 %b) + declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + +Overview +""""""""" + +The '``llvm.ushl.sat``' family of intrinsic functions perform unsigned +saturation left shift on the first argument. + +Arguments +"""""""""" + +The arguments (``%a`` and ``%b``) and the result may be of integer types of any +bit width, but they must have the same bit width. ``%a`` is the value to be +shifted, and ``%b`` is the amount to shift by. ``%b`` must be less than the bit +width. + +Semantics: +"""""""""" + +The maximum value this operation can clamp to is the largest unsigned value +representable by the bit width of the arguments. + + +Examples +""""""""" + +.. code-block:: llvm + + %res = call i4 @llvm.ushl.sat.i4(i4 2, i4 1) ; %res = 4 + %res = call i4 @llvm.ushl.sat.i4(i4 3, i4 3) ; %res = 15 + + Fixed Point Arithmetic Intrinsics --------------------------------- diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -310,6 +310,15 @@ SSUBSAT, USUBSAT, + /// RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift on 2 + /// integers with the same bit width (W). If the true value of LHS << RHS + /// exceeds the largest value that can be represented by W bits, the + /// resulting value is this maximum value, Otherwise, if this value is less + /// than the smallest value that can be represented by W bits, the + /// resulting value is this minimum value. + SSHLSAT, + USHLSAT, + /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication /// on /// 2 integers with the same width and scale. SCALE represents the scale of diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4397,6 +4397,10 @@ /// method accepts integers as its arguments. SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[US]SHLSAT. This + /// method accepts integers as its arguments. + SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This /// method accepts integers as its arguments. SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -995,6 +995,12 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_sshl_sat : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_ushl_sat : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===// // diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -396,6 +396,8 @@ def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>; def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; +def sshlsat : SDNode<"ISD::SSHLSAT" , SDTIntBinOp>; +def ushlsat : SDNode<"ISD::USHLSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1118,7 +1118,9 @@ case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: - case ISD::USUBSAT: { + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; } @@ -3467,6 +3469,10 @@ case ISD::USUBSAT: Results.push_back(TLI.expandAddSubSat(Node, DAG)); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: + Results.push_back(TLI.expandShlSat(Node, DAG)); + break; case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -156,6 +156,9 @@ case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: Res = PromoteIntRes_SHLSAT(N); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -781,6 +784,46 @@ } } +SDValue DAGTypeLegalizer::PromoteIntRes_SHLSAT(SDNode *N) { + // Expand it to + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US]SHLSAT + // 4. SRA/SRL by M-N + // We cannot expand this to a min/max check because a shift may shift out all + // bits, and then we cannot detect overflow. + SDLoc dl(N); + SDValue Op1 = N->getOperand(0); + SDValue Op2 = N->getOperand(1); + unsigned OldBits = Op1.getScalarValueSizeInBits(); + + unsigned Opcode = N->getOpcode(); + + SDValue Op1Promoted, Op2Promoted; + unsigned ShiftOp; + if (Opcode == ISD::USHLSAT) { + Op1Promoted = ZExtPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + ShiftOp = ISD::SRL; + } else { + Op1Promoted = SExtPromotedInteger(Op1); + Op2Promoted = SExtPromotedInteger(Op2); + ShiftOp = ISD::SRA; + } + EVT PromotedType = Op1Promoted.getValueType(); + unsigned NewBits = PromotedType.getScalarSizeInBits(); + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); +} + SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { // Can just promote the operands then continue with operation. SDLoc dl(N); @@ -2025,6 +2068,9 @@ case ISD::SSUBSAT: case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break; + case ISD::SSHLSAT: + case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -3147,6 +3193,12 @@ SplitInteger(Result, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Result = TLI.expandShlSat(N, DAG); + SplitInteger(Result, Lo, Hi); +} + /// This performs an expansion of the integer result for a fixed point /// multiplication. The default expansion performs rounding down towards /// negative infinity, though targets that do care about rounding should specify diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -342,6 +342,7 @@ SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); + SDValue PromoteIntRes_SHLSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); @@ -442,6 +443,7 @@ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -454,6 +454,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -129,6 +129,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: case ISD::FPOW: case ISD::FREM: @@ -939,6 +941,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: @@ -2758,6 +2762,8 @@ case ISD::SADDSAT: case ISD::USUBSAT: case ISD::SSUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: Res = WidenVecRes_Binary(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6354,6 +6354,18 @@ setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } + case Intrinsic::sshl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::ushl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } case Intrinsic::smul_fix: case Intrinsic::umul_fix: case Intrinsic::smul_fix_sat: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,8 @@ case ISD::UADDSAT: return "uaddsat"; case ISD::SSUBSAT: return "ssubsat"; case ISD::USUBSAT: return "usubsat"; + case ISD::SSHLSAT: return "sshlsat"; + case ISD::USHLSAT: return "ushlsat"; case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7343,6 +7343,59 @@ } } +SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + bool IsSigned = Opcode == ISD::SSHLSAT; + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + assert((Node->getOpcode() == ISD::SSHLSAT || + Node->getOpcode() == ISD::USHLSAT) && + "Expected a SHLSAT opcode"); + assert(VT == RHS.getValueType() && "Expected operands to be the same type"); + assert(VT.isInteger() && "Expected operands to be integers"); + + // For signed shifts, we can check for overflow by checking if we would have + // shifted out any bits that disagree with the sign bit. For unsigned shifts, + // we can just check if we would have shifted out any ones. + // TODO: On targets that don't support CTLZ, it may be more efficient to pull + // down the bits to be shifted out and compare those to the signmask/zero + // instead. + + unsigned BW = VT.getScalarSizeInBits(); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); + SDValue Threshold; + + if (IsSigned) { + // The threshold for signed shifting is the number of leading sign bits - 1. + SDValue Mask = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(BW - 1, dl, VT)); + SDValue XORLHS = DAG.getNode(ISD::XOR, dl, VT, LHS, Mask); + SDValue LSignBits = DAG.getNode(ISD::CTLZ, dl, VT, XORLHS); + Threshold = DAG.getNode(ISD::SUB, dl, VT, LSignBits, + DAG.getConstant(1, dl, VT)); + } else { + // The threshold for unsigned shifting is the number of leading zeros. + Threshold = DAG.getNode(ISD::CTLZ, dl, VT, LHS); + } + + SDValue SatVal; + if (IsSigned) { + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); + SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), + SatMin, SatMax, ISD::SETLT); + } else { + SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); + } + // We have overflow if the shift amount is greater than the threshold. + Result = DAG.getSelectCC(dl, RHS, Threshold, SatVal, Result, ISD::SETUGT); + + return Result; +} + SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -657,6 +657,8 @@ setOperationAction(ISD::UADDSAT, VT, Expand); setOperationAction(ISD::SSUBSAT, VT, Expand); setOperationAction(ISD::USUBSAT, VT, Expand); + setOperationAction(ISD::SSHLSAT, VT, Expand); + setOperationAction(ISD::USHLSAT, VT, Expand); setOperationAction(ISD::SMULFIX, VT, Expand); setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4930,15 +4930,17 @@ case Intrinsic::sadd_sat: case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: - case Intrinsic::usub_sat: { + case Intrinsic::usub_sat: + case Intrinsic::sshl_sat: + case Intrinsic::ushl_sat: { Value *Op1 = Call.getArgOperand(0); Value *Op2 = Call.getArgOperand(1); Assert(Op1->getType()->isIntOrIntVectorTy(), - "first operand of [us][add|sub]_sat must be an int type or vector " - "of ints"); + "first operand of [us][add|sub|shl]_sat must be an int type or " + "vector of ints"); Assert(Op2->getType()->isIntOrIntVectorTy(), - "second operand of [us][add|sub]_sat must be an int type or vector " - "of ints"); + "second operand of [us][add|sub|shl]_sat must be an int type or " + "vector of ints"); break; } case Intrinsic::smul_fix: diff --git a/llvm/test/CodeGen/X86/sshl_sat.ll b/llvm/test/CodeGen/X86/sshl_sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/sshl_sat.ll @@ -0,0 +1,577 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 + +declare i4 @llvm.sshl.sat.i4 (i4, i4) +declare i15 @llvm.sshl.sat.i15 (i15, i15) +declare i16 @llvm.sshl.sat.i16 (i16, i16) +declare i18 @llvm.sshl.sat.i18 (i18, i18) +declare i64 @llvm.sshl.sat.i64 (i64, i64) +declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>) + +define i16 @func(i16 %x, i16 %y) nounwind { +; X64-LABEL: func: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movswl %di, %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: sarl $15, %eax +; X64-NEXT: xorl %edi, %eax +; X64-NEXT: bsrw %ax, %ax +; X64-NEXT: movw $31, %si +; X64-NEXT: cmovnew %ax, %si +; X64-NEXT: xorl $15, %esi +; X64-NEXT: decl %esi +; X64-NEXT: shll %cl, %edi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testw %dx, %dx +; X64-NEXT: sets %al +; X64-NEXT: addl $32767, %eax # imm = 0x7FFF +; X64-NEXT: cmpw %si, %cx +; X64-NEXT: cmovbel %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movswl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sarl $15, %eax +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: bsrw %ax, %ax +; X86-NEXT: movw $31, %si +; X86-NEXT: cmovnew %ax, %si +; X86-NEXT: xorl $15, %esi +; X86-NEXT: decl %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %dx, %dx +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %si, %cx +; X86-NEXT: cmovbel %edi, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 %y) + ret i16 %tmp +} + +define i16 @func2(i8 %x, i8 %y) nounwind { +; X64-LABEL: func2: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: movswl %ax, %edx +; X64-NEXT: movsbl %cl, %esi +; X64-NEXT: addl %eax, %eax +; X64-NEXT: shrl $15, %edx +; X64-NEXT: xorl %eax, %edx +; X64-NEXT: bsrw %dx, %dx +; X64-NEXT: movw $31, %di +; X64-NEXT: cmovnew %dx, %di +; X64-NEXT: xorl $15, %edi +; X64-NEXT: decl %edi +; X64-NEXT: movl %eax, %edx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %di, %si +; X64-NEXT: cmovbel %edx, %ecx +; X64-NEXT: movswl %cx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func2: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: shrl $15, %eax +; X86-NEXT: addl %edx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: bsrw %ax, %ax +; X86-NEXT: movw $31, %si +; X86-NEXT: cmovnew %ax, %si +; X86-NEXT: xorl $15, %esi +; X86-NEXT: decl %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %dx, %dx +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %si, %cx +; X86-NEXT: cmovbel %edi, %eax +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %x2 = sext i8 %x to i15 + %y2 = sext i8 %y to i15 + %tmp = call i15 @llvm.sshl.sat.i15(i15 %x2, i15 %y2) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i16 @func3(i15 %x, i8 %y) nounwind { +; X64-LABEL: func3: +; X64: # %bb.0: +; X64-NEXT: shll $8, %esi +; X64-NEXT: movswl %si, %ecx +; X64-NEXT: sarl %ecx +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movswl %di, %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: sarl $15, %edx +; X64-NEXT: xorl %edi, %edx +; X64-NEXT: bsrw %dx, %dx +; X64-NEXT: movw $31, %si +; X64-NEXT: cmovnew %dx, %si +; X64-NEXT: xorl $15, %esi +; X64-NEXT: decl %esi +; X64-NEXT: shll %cl, %edi +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %dl +; X64-NEXT: addl $32767, %edx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %cx +; X64-NEXT: cmovbel %edi, %edx +; X64-NEXT: movswl %dx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func3: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $8, %ecx +; X86-NEXT: movswl %cx, %ecx +; X86-NEXT: sarl %ecx +; X86-NEXT: addl %eax, %eax +; X86-NEXT: movswl %ax, %esi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sarl $15, %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: bsrw %dx, %dx +; X86-NEXT: movw $31, %di +; X86-NEXT: cmovnew %dx, %di +; X86-NEXT: xorl $15, %edi +; X86-NEXT: decl %edi +; X86-NEXT: shll %cl, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testw %si, %si +; X86-NEXT: sets %dl +; X86-NEXT: addl $32767, %edx # imm = 0x7FFF +; X86-NEXT: cmpw %di, %cx +; X86-NEXT: cmovbel %eax, %edx +; X86-NEXT: movswl %dx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %y2 = sext i8 %y to i15 + %y3 = shl i15 %y2, 7 + %tmp = call i15 @llvm.sshl.sat.i15(i15 %x, i15 %y3) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i4 @func4(i4 %x, i4 %y) nounwind { +; X64-LABEL: func4: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: shlb $4, %cl +; X64-NEXT: sarb $4, %cl +; X64-NEXT: shlb $4, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb $7, %al +; X64-NEXT: xorb %dil, %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: bsrl %eax, %eax +; X64-NEXT: movl $15, %edx +; X64-NEXT: cmovnel %eax, %edx +; X64-NEXT: xorl $7, %edx +; X64-NEXT: decb %dl +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shlb %cl, %al +; X64-NEXT: movzbl %al, %esi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %dil, %dil +; X64-NEXT: sets %al +; X64-NEXT: addl $127, %eax +; X64-NEXT: cmpb %dl, %cl +; X64-NEXT: cmovbel %esi, %eax +; X64-NEXT: sarb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: shlb $4, %cl +; X86-NEXT: sarb $4, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: shlb $4, %ch +; X86-NEXT: movb %ch, %al +; X86-NEXT: sarb $7, %al +; X86-NEXT: xorb %ch, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: bsrl %eax, %eax +; X86-NEXT: movl $15, %edx +; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: xorl $7, %edx +; X86-NEXT: decb %dl +; X86-NEXT: movb %ch, %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: movzbl %al, %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %ch, %ch +; X86-NEXT: sets %al +; X86-NEXT: addl $127, %eax +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: cmovbel %esi, %eax +; X86-NEXT: sarb $4, %al +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i4 @llvm.sshl.sat.i4(i4 %x, i4 %y) + ret i4 %tmp +} + +define i64 @func5(i64 %x, i64 %y) nounwind { +; X64-LABEL: func5: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: sarq $63, %rax +; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: bsrq %rax, %rax +; X64-NEXT: movl $127, %edx +; X64-NEXT: cmovneq %rax, %rdx +; X64-NEXT: xorq $63, %rdx +; X64-NEXT: decq %rdx +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: shlq %cl, %r8 +; X64-NEXT: xorl %esi, %esi +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: sets %sil +; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: addq %rsi, %rax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmovbeq %r8, %rax +; X64-NEXT: retq +; +; X86-LABEL: func5: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: bsrl %esi, %esi +; X86-NEXT: movl $63, %ebx +; X86-NEXT: cmovnel %esi, %ebx +; X86-NEXT: xorl $31, %ebx +; X86-NEXT: addl $32, %ebx +; X86-NEXT: xorl %eax, %edi +; X86-NEXT: bsrl %edi, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: testl %edi, %edi +; X86-NEXT: cmovel %ebx, %esi +; X86-NEXT: subl $1, %esi +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %ebp, %edi +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovnel %ecx, %ebp +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testl %eax, %eax +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl %ecx, %eax +; X86-NEXT: sets %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: cmovael %ebp, %eax +; X86-NEXT: cmovael %edi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl + %tmp = call i64 @llvm.sshl.sat.i64(i64 %x, i64 %y) + ret i64 %tmp +} + +define i18 @func6(i16 %x, i16 %y) nounwind { +; X64-LABEL: func6: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movswl %di, %edx +; X64-NEXT: movswl %cx, %r8d +; X64-NEXT: movl %edx, %eax +; X64-NEXT: sarl $31, %eax +; X64-NEXT: shll $14, %edx +; X64-NEXT: xorl %edx, %eax +; X64-NEXT: bsrl %eax, %eax +; X64-NEXT: movl $63, %edi +; X64-NEXT: cmovnel %eax, %edi +; X64-NEXT: xorl $31, %edi +; X64-NEXT: decl %edi +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %esi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testl %edx, %edx +; X64-NEXT: sets %al +; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %edi, %r8d +; X64-NEXT: cmovbel %esi, %eax +; X64-NEXT: sarl $14, %eax +; X64-NEXT: retq +; +; X86-LABEL: func6: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movswl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: shll $14, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: bsrl %eax, %eax +; X86-NEXT: movl $63, %esi +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: decl %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %al +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %esi, %ecx +; X86-NEXT: cmovbel %edi, %eax +; X86-NEXT: sarl $14, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %x2 = sext i16 %x to i18 + %y2 = sext i16 %y to i18 + %tmp = call i18 @llvm.sshl.sat.i18(i18 %x2, i18 %y2) + ret i18 %tmp +} + +define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { +; X64-LABEL: vec: +; X64: # %bb.0: +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] +; X64-NEXT: movd %xmm2, %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: sarl $31, %eax +; X64-NEXT: xorl %edx, %eax +; X64-NEXT: bsrl %eax, %esi +; X64-NEXT: movl $63, %r8d +; X64-NEXT: cmovel %r8d, %esi +; X64-NEXT: xorl $31, %esi +; X64-NEXT: decl %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; X64-NEXT: movd %xmm2, %ecx +; X64-NEXT: movl %edx, %edi +; X64-NEXT: shll %cl, %edi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testl %edx, %edx +; X64-NEXT: sets %al +; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %esi, %ecx +; X64-NEXT: cmovbel %edi, %eax +; X64-NEXT: movd %eax, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] +; X64-NEXT: movd %xmm3, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarl $31, %ecx +; X64-NEXT: xorl %eax, %ecx +; X64-NEXT: bsrl %ecx, %edx +; X64-NEXT: cmovel %r8d, %edx +; X64-NEXT: xorl $31, %edx +; X64-NEXT: decl %edx +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; X64-NEXT: movd %xmm3, %ecx +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %dil +; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmovbel %esi, %edi +; X64-NEXT: movd %edi, %xmm3 +; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarl $31, %ecx +; X64-NEXT: xorl %eax, %ecx +; X64-NEXT: bsrl %ecx, %edx +; X64-NEXT: cmovel %r8d, %edx +; X64-NEXT: xorl $31, %edx +; X64-NEXT: decl %edx +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %dil +; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmovbel %esi, %edi +; X64-NEXT: movd %edi, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: sarl $31, %ecx +; X64-NEXT: xorl %eax, %ecx +; X64-NEXT: bsrl %ecx, %edx +; X64-NEXT: cmovel %r8d, %edx +; X64-NEXT: xorl $31, %edx +; X64-NEXT: decl %edx +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sets %dil +; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmovbel %esi, %edi +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: vec: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: bsrl %esi, %edi +; X86-NEXT: movl $63, %esi +; X86-NEXT: cmovel %esi, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: decl %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %edx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sets %al +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %edi, %ecx +; X86-NEXT: cmovbel %ebx, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: bsrl %ecx, %edi +; X86-NEXT: cmovel %esi, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: decl %edi +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sets %bl +; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmovbel %ebp, %ebx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: bsrl %edx, %edi +; X86-NEXT: cmovel %esi, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: decl %edi +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sets %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %edi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmovbel %ebp, %edx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %edi, %eax +; X86-NEXT: bsrl %eax, %ebp +; X86-NEXT: cmovel %esi, %ebp +; X86-NEXT: movl %edi, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: xorl $31, %ebp +; X86-NEXT: decl %ebp +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %edi, %edi +; X86-NEXT: sets %al +; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-NEXT: cmpl %ebp, %ecx +; X86-NEXT: cmovbel %esi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, 12(%ecx) +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %ebx, 4(%ecx) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%ecx) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $4, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %tmp +} diff --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/ushl_sat.ll @@ -0,0 +1,405 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 + +declare i4 @llvm.ushl.sat.i4 (i4, i4) +declare i15 @llvm.ushl.sat.i15 (i15, i15) +declare i16 @llvm.ushl.sat.i16 (i16, i16) +declare i18 @llvm.ushl.sat.i18 (i18, i18) +declare i64 @llvm.ushl.sat.i64 (i64, i64) +declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>) + +define i16 @func(i16 %x, i16 %y) nounwind { +; X64-LABEL: func: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: bsrw %di, %ax +; X64-NEXT: movw $31, %dx +; X64-NEXT: cmovnew %ax, %dx +; X64-NEXT: xorl $15, %edx +; X64-NEXT: shll %cl, %edi +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: movl $65535, %eax # imm = 0xFFFF +; X64-NEXT: cmovbel %edi, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bsrw %dx, %ax +; X86-NEXT: movw $31, %si +; X86-NEXT: cmovnew %ax, %si +; X86-NEXT: xorl $15, %esi +; X86-NEXT: shll %cl, %edx +; X86-NEXT: cmpw %si, %cx +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovbel %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 %y) + ret i16 %tmp +} + +define i16 @func2(i8 %x, i8 %y) nounwind { +; X64-LABEL: func2: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: movsbl %cl, %edx +; X64-NEXT: andl $32767, %edx # imm = 0x7FFF +; X64-NEXT: addl %eax, %eax +; X64-NEXT: bsrw %ax, %si +; X64-NEXT: movw $31, %di +; X64-NEXT: cmovnew %si, %di +; X64-NEXT: xorl $15, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %eax +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: movl $65535, %ecx # imm = 0xFFFF +; X64-NEXT: cmovbel %eax, %ecx +; X64-NEXT: movswl %cx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func2: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $32767, %eax # imm = 0x7FFF +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: bsrw %dx, %si +; X86-NEXT: movw $31, %di +; X86-NEXT: cmovnew %si, %di +; X86-NEXT: xorl $15, %edi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: cmpw %di, %ax +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovbel %edx, %eax +; X86-NEXT: cwtl +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %x2 = sext i8 %x to i15 + %y2 = sext i8 %y to i15 + %tmp = call i15 @llvm.ushl.sat.i15(i15 %x2, i15 %y2) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i16 @func3(i15 %x, i8 %y) nounwind { +; X64-LABEL: func3: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: shll $7, %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: andl $32640, %eax # imm = 0x7F80 +; X64-NEXT: addl %edi, %edi +; X64-NEXT: bsrw %di, %dx +; X64-NEXT: movw $31, %si +; X64-NEXT: cmovnew %dx, %si +; X64-NEXT: xorl $15, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %edi +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: movl $65535, %eax # imm = 0xFFFF +; X64-NEXT: cmovbel %edi, %eax +; X64-NEXT: cwtl +; X64-NEXT: shrl %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func3: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll $7, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $32640, %edx # imm = 0x7F80 +; X86-NEXT: addl %eax, %eax +; X86-NEXT: bsrw %ax, %si +; X86-NEXT: movw $31, %di +; X86-NEXT: cmovnew %si, %di +; X86-NEXT: xorl $15, %edi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %eax +; X86-NEXT: cmpw %di, %dx +; X86-NEXT: movl $65535, %ecx # imm = 0xFFFF +; X86-NEXT: cmovbel %eax, %ecx +; X86-NEXT: movswl %cx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %y2 = sext i8 %y to i15 + %y3 = shl i15 %y2, 7 + %tmp = call i15 @llvm.ushl.sat.i15(i15 %x, i15 %y3) + %tmp2 = sext i15 %tmp to i16 + ret i16 %tmp2 +} + +define i4 @func4(i4 %x, i4 %y) nounwind { +; X64-LABEL: func4: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: andb $15, %cl +; X64-NEXT: shlb $4, %dil +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: bsrl %eax, %edx +; X64-NEXT: movl $15, %esi +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: xorl $7, %esi +; X64-NEXT: shlb %cl, %al +; X64-NEXT: movzbl %al, %edx +; X64-NEXT: cmpb %sil, %cl +; X64-NEXT: movl $255, %eax +; X64-NEXT: cmovbel %edx, %eax +; X64-NEXT: shrb $4, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; X86-LABEL: func4: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: andb $15, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: shlb $4, %dl +; X86-NEXT: movzbl %dl, %eax +; X86-NEXT: bsrl %eax, %esi +; X86-NEXT: movl $15, %eax +; X86-NEXT: cmovnel %esi, %eax +; X86-NEXT: xorl $7, %eax +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edx +; X86-NEXT: cmpb %al, %cl +; X86-NEXT: movl $255, %eax +; X86-NEXT: cmovbel %edx, %eax +; X86-NEXT: shrb $4, %al +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl + %tmp = call i4 @llvm.ushl.sat.i4(i4 %x, i4 %y) + ret i4 %tmp +} + +define i64 @func5(i64 %x, i64 %y) nounwind { +; X64-LABEL: func5: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: bsrq %rdi, %rax +; X64-NEXT: movl $127, %edx +; X64-NEXT: cmovneq %rax, %rdx +; X64-NEXT: xorq $63, %rdx +; X64-NEXT: shlq %cl, %rdi +; X64-NEXT: cmpq %rdx, %rsi +; X64-NEXT: movq $-1, %rax +; X64-NEXT: cmovbeq %rdi, %rax +; X64-NEXT: retq +; +; X86-LABEL: func5: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bsrl %esi, %eax +; X86-NEXT: movl $63, %edi +; X86-NEXT: cmovnel %eax, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: addl $32, %edi +; X86-NEXT: bsrl %edx, %ebx +; X86-NEXT: xorl $31, %ebx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: cmovel %edi, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: cmovnel %esi, %eax +; X86-NEXT: cmpl %ecx, %ebx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: cmovbl %ecx, %edx +; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl + %tmp = call i64 @llvm.ushl.sat.i64(i64 %x, i64 %y) + ret i64 %tmp +} + +define i18 @func6(i16 %x, i16 %y) nounwind { +; X64-LABEL: func6: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movswl %di, %edx +; X64-NEXT: movswl %cx, %eax +; X64-NEXT: andl $262143, %eax # imm = 0x3FFFF +; X64-NEXT: shll $14, %edx +; X64-NEXT: bsrl %edx, %esi +; X64-NEXT: movl $63, %edi +; X64-NEXT: cmovnel %esi, %edi +; X64-NEXT: xorl $31, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: cmpl %edi, %eax +; X64-NEXT: movl $-1, %eax +; X64-NEXT: cmovbel %edx, %eax +; X64-NEXT: shrl $14, %eax +; X64-NEXT: retq +; +; X86-LABEL: func6: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $262143, %eax # imm = 0x3FFFF +; X86-NEXT: movswl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shll $14, %edx +; X86-NEXT: bsrl %edx, %esi +; X86-NEXT: movl $63, %edi +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: xorl $31, %edi +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: cmpl %edi, %eax +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovbel %edx, %eax +; X86-NEXT: shrl $14, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl + %x2 = sext i16 %x to i18 + %y2 = sext i16 %y to i18 + %tmp = call i18 @llvm.ushl.sat.i18(i18 %x2, i18 %y2) + ret i18 %tmp +} + +define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { +; X64-LABEL: vec: +; X64: # %bb.0: +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] +; X64-NEXT: movd %xmm2, %esi +; X64-NEXT: bsrl %esi, %edx +; X64-NEXT: movl $63, %eax +; X64-NEXT: cmovel %eax, %edx +; X64-NEXT: xorl $31, %edx +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; X64-NEXT: movd %xmm2, %ecx +; X64-NEXT: shll %cl, %esi +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: movl $-1, %edx +; X64-NEXT: cmoval %edx, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] +; X64-NEXT: movd %xmm3, %esi +; X64-NEXT: bsrl %esi, %edi +; X64-NEXT: cmovel %eax, %edi +; X64-NEXT: xorl $31, %edi +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] +; X64-NEXT: movd %xmm3, %ecx +; X64-NEXT: shll %cl, %esi +; X64-NEXT: cmpl %edi, %ecx +; X64-NEXT: cmoval %edx, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: movd %xmm0, %esi +; X64-NEXT: bsrl %esi, %edi +; X64-NEXT: cmovel %eax, %edi +; X64-NEXT: xorl $31, %edi +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: shll %cl, %esi +; X64-NEXT: cmpl %edi, %ecx +; X64-NEXT: cmoval %edx, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-NEXT: movd %xmm0, %esi +; X64-NEXT: bsrl %esi, %edi +; X64-NEXT: cmovel %eax, %edi +; X64-NEXT: xorl $31, %edi +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: shll %cl, %esi +; X64-NEXT: cmpl %edi, %ecx +; X64-NEXT: cmoval %edx, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: vec: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: bsrl %edx, %esi +; X86-NEXT: movl $63, %eax +; X86-NEXT: cmovel %eax, %esi +; X86-NEXT: movl $63, %eax +; X86-NEXT: xorl $31, %esi +; X86-NEXT: shll %cl, %edx +; X86-NEXT: cmpl %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl $-1, %edi +; X86-NEXT: cmoval %edi, %edx +; X86-NEXT: bsrl %esi, %ebx +; X86-NEXT: cmovel %eax, %ebx +; X86-NEXT: xorl $31, %ebx +; X86-NEXT: shll %cl, %esi +; X86-NEXT: cmpl %ebx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: cmoval %edi, %esi +; X86-NEXT: bsrl %ebx, %ebp +; X86-NEXT: cmovel %eax, %ebp +; X86-NEXT: xorl $31, %ebp +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: cmpl %ebp, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: cmoval %edi, %ebx +; X86-NEXT: bsrl %ebp, %eax +; X86-NEXT: movl $63, %ecx +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll %cl, %ebp +; X86-NEXT: xorl $31, %eax +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: cmoval %edi, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %ebp, 12(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %tmp +}