Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -256,6 +256,13 @@ /// Same for multiplication. SMULO, UMULO, + /// SSAT(X, W) - Perform saturation on a signed value X to fit in W bits. If + /// X is greater than the largest signed value that can be represented in W + /// bits, the returned value is this largest signed value. If X is less than + /// the smallest signed value that can be represented in W bits, this smallest + /// signed value is returned instead. + SSAT, + /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -797,6 +797,34 @@ return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } + /// Custom method defined by each target to indicate if an operation which + /// may require a saturation bit width is supported natively by the target. + /// If not, the operation is illegal. + virtual bool isSupportedSaturationOperation(unsigned Op, EVT VT, + unsigned SatBitWidth) const { + return false; + } + + /// Some scaled operations may be natively supported by the target but only + /// for specific scales. This method allows for checking if the scale is + /// supported by the target for a given operation that may depend on scale. + LegalizeAction getSaturationOperationAction(unsigned Op, EVT VT, + unsigned SatBitWidth) const { + auto Action = getOperationAction(Op, VT); + if (Action != Legal) + return Action; + + // This operation is supported but may only work on specific scales. + bool Supported; + switch (Op) { + default: + llvm_unreachable("Unexpected saturation operation"); + case ISD::SSAT: + Supported = isSupportedSaturationOperation(Op, VT, SatBitWidth); + } + return Supported ? Action : Expand; + } + LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { unsigned EqOpc; switch (Op) { Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -700,6 +700,12 @@ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +//===------------------------- Fixed Point Intrinsics ---------------------===// +// +def int_ssaturate : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===------------------------- Memory Use Markers -------------------------===// // def int_lifetime_start : Intrinsic<[], Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1115,6 +1115,16 @@ Action = TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)); break; + case ISD::SSAT: { + SDValue SatBits = Node->getOperand(1); + auto *SatBitsNode = dyn_cast(SatBits); + if (!SatBitsNode) + report_fatal_error( + "Second argument of ssaturate intrinsic must be a constant integer"); + unsigned NumSatBits = SatBitsNode->getZExtValue(); + Action = TLI.getSaturationOperationAction( + Node->getOpcode(), Node->getValueType(0), NumSatBits); + } default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -3461,6 +3471,36 @@ } break; } + case ISD::SSAT: { + SDValue SatBits = Node->getOperand(1); + auto *SatBitsNode = dyn_cast(SatBits); + if (!SatBitsNode) + report_fatal_error( + "Second argument of ssaturate intrinsic must be a constant integer"); + + SDValue Op1 = Node->getOperand(0); + unsigned NumSatBits = SatBitsNode->getZExtValue(); + unsigned SrcBits = Op1.getValueSizeInBits(); + if (SrcBits <= NumSatBits) { + Results.push_back(Op1); + break; + } + + EVT Ty = Op1.getValueType(); + auto MinVal = APInt::getSignedMinValue(NumSatBits).sext(SrcBits); + auto MaxVal = APInt::getSignedMaxValue(NumSatBits).sext(SrcBits); + auto ConstMinVal = DAG.getConstant(MinVal, dl, Ty); + auto ConstMaxVal = DAG.getConstant(MaxVal, dl, Ty); + + EVT BoolVT = getSetCCResultType(MVT::i1); + SDValue UseMin = DAG.getSetCC(dl, BoolVT, Op1, ConstMinVal, ISD::SETLT); + SDValue UseMax = DAG.getSetCC(dl, BoolVT, Op1, ConstMaxVal, ISD::SETGT); + SDValue Result = DAG.getSelect(dl, Ty, UseMax, ConstMaxVal, Op1); + Result = DAG.getSelect(dl, Ty, UseMin, ConstMinVal, Result); + + Results.push_back(Result); + break; + } case ISD::SADDO: case ISD::SSUBO: { SDValue LHS = Node->getOperand(0); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -141,6 +141,10 @@ case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::SSAT: + Res = PromoteIntRes_SSAT(N); + break; + case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast(N)); break; @@ -534,6 +538,12 @@ return SDValue(Res.getNode(), 1); } +SDValue DAGTypeLegalizer::PromoteIntRes_SSAT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -330,6 +330,7 @@ SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SSAT(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5763,6 +5763,12 @@ setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); return nullptr; } + case Intrinsic::ssaturate: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSAT, sdl, Op1.getValueType(), Op1, Op2)); + return nullptr; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -282,6 +282,8 @@ case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; + case ISD::SSAT: return "ssaturate"; + // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -608,6 +608,7 @@ setOperationAction(ISD::UMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); setOperationAction(ISD::ABS, VT, Expand); + setOperationAction(ISD::SSAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); Index: test/CodeGen/Generic/ssaturate.ll =================================================================== --- /dev/null +++ test/CodeGen/Generic/ssaturate.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s + +declare i4 @llvm.ssaturate.i4 (i4, i32) +declare i32 @llvm.ssaturate.i32 (i32, i32) + +define i32 @func() { +entry: + %x = alloca i32, align 4 + + store i32 16, i32* %x, align 4 + %val = load i32, i32* %x + + %tmp = call i32 @llvm.ssaturate.i32( i32 %val, i32 4 ) + ret i32 %tmp +} + +define i4 @func2() { +entry: + %x = alloca i4, align 1 + + store i4 1, i4* %x, align 1 + %val = load i4, i4* %x + + %tmp = call i4 @llvm.ssaturate.i4( i4 %val, i32 8 ) + ret i4 %tmp +} Index: test/CodeGen/X86/ssaturate.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/ssaturate.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s + +declare i4 @llvm.ssaturate.i4 (i4, i32) +declare i32 @llvm.ssaturate.i32 (i32, i32) + +define i32 @func(i32 %x) { +entry: +; CHECK: func: +; CHECK: cmpl $127, %edi +; CHECK-NEXT: movl $127, %ecx +; CHECK-NEXT: cmovlel %edi, %ecx +; CHECK-NEXT: cmpl $-128, %edi +; CHECK-NEXT: movl $-128, %eax +; CHECK-NEXT: cmovgel %ecx, %eax +; CHECK-NEXT: retq + + %tmp = call i32 @llvm.ssaturate.i32( i32 %x, i32 8 ) + ret i32 %tmp +} + +define i4 @func2(i4 %x) { +entry: +; CHECK: func2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + + %tmp = call i4 @llvm.ssaturate.i4( i4 %x, i32 8 ) + ret i4 %tmp +}