Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -256,6 +256,14 @@ /// Same for multiplication. SMULO, UMULO, + /// SSAT(X, W) - Perform saturation on a signed value X to fit in W bits. If + /// X is greater than the largest signed value that can be represented in W + /// bits, the returned value is this largest signed value. If X is less than + /// the smallest signed value that can be represented in W bits, this smallest + /// signed value is returned instead. W must be a ValueType node that + /// represents an int of W bits. + SSAT, + /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -833,6 +833,9 @@ return getNode(ISD::ADD, SL, VT, Op, Offset, Flags); } + /// Method for building the DAG expansion of ISD::SSAT. + SDValue getExpandedSignedSaturation(SDNode *Node); + /// Return a new CALLSEQ_START node, that starts new call frame, in which /// InSize bytes are set up inside CALLSEQ_START..CALLSEQ_END sequence and /// OutSize specifies part of the frame set up prior to the sequence. Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -797,6 +797,35 @@ return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } + /// Custom method defined by each target to indicate if an operation which + /// may require a saturation bit width is supported natively by the target. + /// If not, the operation is illegal. + virtual bool isSupportedSaturationOperation(unsigned Op, EVT VT, + unsigned SatBitWidth) const { + return false; + } + + /// Some saturation operations may be natively supported by the target but + /// only for specific scales. This method allows for checking if the scale is + /// supported by the target for a given operation that may depend on scale. + LegalizeAction getSaturationOperationAction(unsigned Op, EVT VT, + unsigned SatBitWidth) const { + auto Action = getOperationAction(Op, VT); + if (Action != Legal) + return Action; + + // This operation is supported in this type but may only work on specific + // saturation widths. + bool Supported; + switch (Op) { + default: + llvm_unreachable("Unexpected saturation operation"); + case ISD::SSAT: + Supported = isSupportedSaturationOperation(Op, VT, SatBitWidth); + } + return Supported ? Action : Expand; + } + LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { unsigned EqOpc; switch (Op) { Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -700,6 +700,12 @@ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +//===------------------------- Fixed Point Intrinsics ---------------------===// +// +def int_ssaturate : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===------------------------- Memory Use Markers -------------------------===// // def int_lifetime_start : Intrinsic<[], Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1115,6 +1115,13 @@ Action = TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)); break; + case ISD::SSAT: { + SDValue SatBits = Node->getOperand(1); + auto *SatBitsNode = cast(SatBits); + unsigned NumSatBits = SatBitsNode->getVT().getScalarSizeInBits(); + Action = TLI.getSaturationOperationAction( + Node->getOpcode(), Node->getValueType(0), NumSatBits); + } default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -3452,6 +3459,9 @@ } break; } + case ISD::SSAT: + Results.push_back(DAG.getExpandedSignedSaturation(Node)); + break; case ISD::SADDO: case ISD::SSUBO: { SDValue LHS = Node->getOperand(0); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -141,6 +141,8 @@ case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::SSAT: Res = PromoteIntRes_SSAT(N); break; + case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast(N)); break; @@ -534,6 +536,12 @@ return SDValue(Res.getNode(), 1); } +SDValue DAGTypeLegalizer::PromoteIntRes_SSAT(SDNode *N) { + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); @@ -1454,6 +1462,8 @@ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; case ISD::UMULO: case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; + + case ISD::SSAT: ExpandIntRes_SSAT(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2416,6 +2426,12 @@ ReplaceValueWith(SDValue(N, 1), R.getValue(2)); } +void DAGTypeLegalizer::ExpandIntRes_SSAT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Result = DAG.getExpandedSignedSaturation(N); + SplitInteger(Result, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -330,6 +330,7 @@ SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SSAT(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -414,6 +415,7 @@ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SSAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8933,3 +8933,30 @@ void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { checkForCycles(DAG->getRoot().getNode(), DAG, force); } + +SDValue SelectionDAG::getExpandedSignedSaturation(SDNode *Node) { + assert(Node->getOpcode() == ISD::SSAT); + assert(Node->getNumOperands() == 2); + + SDLoc dl(Node); + SDValue SatBits = Node->getOperand(1); + auto *SatBitsNode = cast(SatBits); + + SDValue Op1 = Node->getOperand(0); + unsigned NumSatBits = SatBitsNode->getVT().getScalarSizeInBits(); + unsigned SrcBits = Op1.getValueSizeInBits(); + if (SrcBits <= NumSatBits) + return Op1; + + EVT Ty = Op1.getValueType(); + auto MinVal = APInt::getSignedMinValue(NumSatBits).sext(SrcBits); + auto MaxVal = APInt::getSignedMaxValue(NumSatBits).sext(SrcBits); + auto ConstMinVal = getConstant(MinVal, dl, Ty); + auto ConstMaxVal = getConstant(MaxVal, dl, Ty); + + EVT BoolVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), MVT::i1); + SDValue UseMin = getSetCC(dl, BoolVT, Op1, ConstMinVal, ISD::SETLT); + SDValue UseMax = getSetCC(dl, BoolVT, Op1, ConstMaxVal, ISD::SETGT); + SDValue Result = getSelect(dl, Ty, UseMax, ConstMaxVal, Op1); + return getSelect(dl, Ty, UseMin, ConstMinVal, Result); +} Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5759,6 +5759,17 @@ setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); return nullptr; } + case Intrinsic::ssaturate: { + SDValue Op1 = getValue(I.getArgOperand(0)); + const auto *SatBitsNode = dyn_cast(I.getArgOperand(1)); + assert(SatBitsNode && + "Second argument of ssaturate intrinsic must be a constant integer"); + + EVT Op2Type = EVT::getIntegerVT(*Context, SatBitsNode->getZExtValue()); + setValue(&I, DAG.getNode(ISD::SSAT, sdl, Op1.getValueType(), Op1, + DAG.getValueType(Op2Type))); + return nullptr; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -282,6 +282,8 @@ case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; + case ISD::SSAT: return "ssaturate"; + // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -608,6 +608,7 @@ setOperationAction(ISD::UMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); setOperationAction(ISD::ABS, VT, Expand); + setOperationAction(ISD::SSAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); Index: test/CodeGen/Generic/ssaturate.ll =================================================================== --- /dev/null +++ test/CodeGen/Generic/ssaturate.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s + +declare i4 @llvm.ssaturate.i4 (i4, i32) +declare i32 @llvm.ssaturate.i32 (i32, i32) + +define i32 @func() { +entry: + %x = alloca i32, align 4 + + store i32 16, i32* %x, align 4 + %val = load i32, i32* %x + + %tmp = call i32 @llvm.ssaturate.i32( i32 %val, i32 4 ) + ret i32 %tmp +} + +define i4 @func2() { +entry: + %x = alloca i4, align 1 + + store i4 1, i4* %x, align 1 + %val = load i4, i4* %x + + %tmp = call i4 @llvm.ssaturate.i4( i4 %val, i32 8 ) + ret i4 %tmp +} Index: test/CodeGen/X86/ssaturate.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/ssaturate.ll @@ -0,0 +1,79 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s --check-prefix=CHECK32 + +declare i4 @llvm.ssaturate.i4 (i4, i32) +declare i32 @llvm.ssaturate.i32 (i32, i32) +declare i64 @llvm.ssaturate.i64 (i64, i32) + +; This expands normally during type legalization +define i32 @func(i32 %x) { +entry: +; CHECK: func: +; CHECK: cmpl $127, %edi +; CHECK-NEXT: movl $127, %ecx +; CHECK-NEXT: cmovlel %edi, %ecx +; CHECK-NEXT: cmpl $-128, %edi +; CHECK-NEXT: movl $-128, %eax +; CHECK-NEXT: cmovgel %ecx, %eax +; CHECK-NEXT: retq + + %tmp = call i32 @llvm.ssaturate.i32( i32 %x, i32 8 ) + ret i32 %tmp +} + +; Value always fits into saturation bits +define i4 @func2(i4 %x) { +entry: +; CHECK: func2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $4, %al +; CHECK-NEXT: sarb $4, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + + %tmp = call i4 @llvm.ssaturate.i4( i4 %x, i32 8 ) + ret i4 %tmp +} + +; Type too large and is split after expansion +define i64 @func3(i64 %x) { +entry: +; CHECK32: func3 +; CHECK32: # %bb.0: # %entry +; CHECK32: pushl %edi +; CHECK32: .cfi_def_cfa_offset 8 +; CHECK32: pushl %esi +; CHECK32: .cfi_def_cfa_offset 12 +; CHECK32: .cfi_offset %esi, -12 +; CHECK32: .cfi_offset %edi, -8 +; CHECK32: movl 12(%esp), %esi +; CHECK32: movl 16(%esp), %eax +; CHECK32: xorl %ecx, %ecx +; CHECK32: movl $127, %edx +; CHECK32: cmpl %esi, %edx +; CHECK32: movl $0, %edi +; CHECK32: sbbl %eax, %edi +; CHECK32: jl .LBB2_2 +; CHECK32: # %bb.1: # %entry +; CHECK32: movl %eax, %ecx +; CHECK32: movl %esi, %edx +; CHECK32: .LBB2_2: # %entry +; CHECK32: cmpl $-128, %esi +; CHECK32: sbbl $-1, %eax +; CHECK32: movl $-128, %eax +; CHECK32: jl .LBB2_4 +; CHECK32: # %bb.3: # %entry +; CHECK32: movl %edx, %eax +; CHECK32: .LBB2_4: # %entry +; CHECK32: movl $-1, %edx +; CHECK32: jl .LBB2_6 +; CHECK32: # %bb.5: # %entry +; CHECK32: movl %ecx, %edx +; CHECK32: .LBB2_6: # %entry +; CHECK32: popl %esi +; CHECK32: popl %edi +; CHECK32: retl + %tmp = call i64 @llvm.ssaturate.i64( i64 %x, i32 8 ) + ret i64 %tmp +}