Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -256,6 +256,13 @@ /// Same for multiplication. SMULO, UMULO, + /// SSAT(X, W) - Perform saturation on a signed value X to fit in W bits. If + /// X is greater than the largest signed value that can be represented in W + /// bits, the returned value is this largest signed value. If X is less than + /// the smallest signed value that can be represented in W bits, this smallest + /// signed value is returned instead. + SSAT, + /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -700,6 +700,12 @@ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +//===------------------------- Fixed Point Intrinsics ---------------------===// +// +def int_ssaturate : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===------------------------- Memory Use Markers -------------------------===// // def int_lifetime_start : Intrinsic<[], Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1115,6 +1115,10 @@ Action = TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)); break; + case ISD::SSAT: + // Target legalization checked here? + Action = TargetLowering::Expand; + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -3461,6 +3465,36 @@ } break; } + case ISD::SSAT: { + SDValue SatBits = Node->getOperand(1); + auto *SatBitsNode = dyn_cast(SatBits); + if (!SatBitsNode) + report_fatal_error( + "Second argument of ssaturate intrinsic must be a constant integer"); + + SDValue Op1 = Node->getOperand(0); + unsigned NumSatBits = SatBitsNode->getZExtValue(); + unsigned SrcBits = Op1.getValueSizeInBits(); + if (SrcBits <= NumSatBits) { + Results.push_back(Op1); + break; + } + + EVT Ty = Op1.getValueType(); + auto MinVal = APInt::getSignedMinValue(NumSatBits).sext(SrcBits); + auto MaxVal = APInt::getSignedMaxValue(NumSatBits).sext(SrcBits); + auto ConstMinVal = DAG.getConstant(MinVal, dl, Ty); + auto ConstMaxVal = DAG.getConstant(MaxVal, dl, Ty); + + EVT BoolVT = getSetCCResultType(MVT::i1); + SDValue UseMin = DAG.getSetCC(dl, BoolVT, Op1, ConstMinVal, ISD::SETLT); + SDValue UseMax = DAG.getSetCC(dl, BoolVT, Op1, ConstMaxVal, ISD::SETGT); + SDValue Result = DAG.getSelect(dl, Ty, UseMax, ConstMaxVal, Op1); + Result = DAG.getSelect(dl, Ty, UseMin, ConstMinVal, Result); + + Results.push_back(Result); + break; + } case ISD::SADDO: case ISD::SSUBO: { SDValue LHS = Node->getOperand(0); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5763,6 +5763,13 @@ setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); return nullptr; } + + case Intrinsic::ssaturate: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSAT, sdl, Op1.getValueType(), Op1, Op2)); + return nullptr; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -280,6 +280,8 @@ case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; + case ISD::SSAT: return "ssaturate"; + // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; case ISD::ZERO_EXTEND: return "zero_extend"; Index: lib/Transforms/CMakeLists.txt =================================================================== --- lib/Transforms/CMakeLists.txt +++ lib/Transforms/CMakeLists.txt @@ -8,3 +8,4 @@ add_subdirectory(Hello) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) +add_subdirectory(FixedPoint) Index: lib/Transforms/FixedPoint/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Transforms/FixedPoint/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_loadable_module( LLVMFixedPoint + SaturationPass.cpp + + DEPENDS + intrinsics_gen + PLUGIN_TOOL + opt + ) Index: lib/Transforms/FixedPoint/SaturationPass.cpp =================================================================== --- /dev/null +++ lib/Transforms/FixedPoint/SaturationPass.cpp @@ -0,0 +1,65 @@ +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +struct SignedSaturationPass : public FunctionPass { + static char ID; + SignedSaturationPass() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override { + SmallVector ToLower; + for (auto &I : instructions(F)) { + if (auto *CI = dyn_cast(&I)) { + Function *Fn = CI->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::ssaturate) { + IRBuilder<> Builder(CI); + LLVMContext &Context = CI->getContext(); + + Value *Src = CI->getArgOperand(0); + auto *SatBits = dyn_cast(CI->getArgOperand(1)); + if (!SatBits) + report_fatal_error("Second argument of ssaturate intrinsic must be " + "a constant integer"); + + unsigned NumSatBits = SatBits->getZExtValue(); + unsigned SrcBits = Src->getType()->getPrimitiveSizeInBits(); + if (SrcBits <= NumSatBits) { + CI->replaceAllUsesWith(Src); // Always fits + ToLower.push_back(CI); + break; + } + + auto MinVal = APInt::getSignedMinValue(NumSatBits).sext(SrcBits); + auto MaxVal = APInt::getSignedMaxValue(NumSatBits).sext(SrcBits); + auto ConstMinVal = ConstantInt::get(Context, MinVal); + auto ConstMaxVal = ConstantInt::get(Context, MaxVal); + Value *UseMin = Builder.CreateICmpSLT(Src, ConstMinVal); + Value *UseMax = Builder.CreateICmpSGT(Src, ConstMaxVal); + Value *Result = Builder.CreateSelect(UseMax, ConstMaxVal, Src); + Result = Builder.CreateSelect(UseMin, ConstMinVal, Result); + + CI->replaceAllUsesWith(Result); + ToLower.push_back(CI); + } + } + } + + for (auto *CI : ToLower) + CI->eraseFromParent(); + + return !ToLower.empty(); + } +}; +} // namespace + +char SignedSaturationPass::ID = 0; +static RegisterPass + SignedSaturation("saturate", "Signed Saturation Pass"); Index: test/Transforms/FixedPoint/saturation.ll =================================================================== --- /dev/null +++ test/Transforms/FixedPoint/saturation.ll @@ -0,0 +1,42 @@ +; RUN: opt -load %llvmshlibdir/LLVMFixedPoint%shlibext -saturate -S -o - < %s | FileCheck %s -check-prefix=SATURATE +; RUN: opt -load %llvmshlibdir/LLVMFixedPoint%shlibext -S -o - < %s | FileCheck %s -check-prefix=NO-SATURATE + +declare i4 @llvm.ssaturate.i4 (i4, i32) +declare i32 @llvm.ssaturate.i32 (i32, i32) + +define i32 @func() { +entry: + ; SATURATE: func + ; NO-SATURATE: func + %x = alloca i32, align 4 + + store i32 16, i32* %x, align 4 + %val = load i32, i32* %x + + ; SATURATE: [[USE_MIN:%[0-9]+]] = icmp slt i32 %val, -8 + ; SATURATE-DAG: [[USE_MAX:%[0-9]+]] = icmp sgt i32 %val, 7 + ; SATURATE-DAG: [[RESULT:%[0-9]+]] = select i1 [[USE_MAX]], i32 7, i32 %val + ; SATURATE-DAG: [[RESULT2:%[0-9]+]] = select i1 [[USE_MIN]], i32 -8, i32 [[RESULT]] + ; SATURATE: ret i32 [[RESULT2]] + ; NO-SATURATE: %tmp = call i32 @llvm.ssaturate.i32(i32 %val, i32 4) + ; NO-SATURATE-NEXT: ret i32 %tmp + %tmp = call i32 @llvm.ssaturate.i32( i32 %val, i32 4 ) + ret i32 %tmp +} + +define i4 @func2() { +entry: + ; SATURATE: func2 + ; NO-SATURATE: func2 + %x = alloca i4, align 1 + + store i4 7, i4* %x, align 1 + %val = load i4, i4* %x + + ; SATURATE: %val = load i4, i4* %x + ; SATURATE-NEXT: ret i4 %val + ; NO-SATURATE: %tmp = call i4 @llvm.ssaturate.i4(i4 %val, i32 4) + ; NO-SATURATE-NEXT: ret i4 %tmp + %tmp = call i4 @llvm.ssaturate.i4( i4 %val, i32 4 ) + ret i4 %tmp +}