Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -249,7 +249,8 @@ void initializeSimpleInlinerPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeRegisterCoalescerPass(PassRegistry&); -void initializeSingleLoopExtractorPass(PassRegistry&); +void initializeSimplifySelectPass(PassRegistry &); +void initializeSingleLoopExtractorPass(PassRegistry &); void initializeSinkingPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPPass(PassRegistry &); void initializeSlotIndexesPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -346,6 +346,12 @@ //===----------------------------------------------------------------------===// // +// SelectSimplyfication - Convert select to saturation instructions in ARM. +// +FunctionPass *createSelectSimplificationPass(); + +//===----------------------------------------------------------------------===// +// // ConstantHoisting - This pass prepares a function for expensive constants. // FunctionPass *createConstantHoistingPass(); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -55,9 +55,11 @@ cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); -static cl::opt -RunLoopRerolling("reroll-loops", cl::Hidden, - cl::desc("Run the loop rerolling pass")); +static cl::opt EnableSelectToIntrinsics("enable-select-to-intrinsics", + cl::init(true)); + +static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, + cl::desc("Run the loop rerolling pass")); static cl::opt RunLoadCombine("combine-loads", cl::init(false), cl::Hidden, @@ -159,6 +161,13 @@ FPM.add(createSROAPass()); else FPM.add(createScalarReplAggregatesPass()); + if (EnableSelectToIntrinsics) { + // ScalarReplAggregatePass exposes more "ifs" to be converted to SELECT. + // EarlyCSE will prevent some "ifs" to be converted to SELECT. So, we + // add a CFGSimplification here, followed by SelectSimplification. + FPM.add(createCFGSimplificationPass()); + FPM.add(createSelectSimplificationPass()); + } FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -41,6 +41,7 @@ Scalarizer.cpp SeparateConstOffsetFromGEP.cpp SimplifyCFGPass.cpp + SimplifySelectPass.cpp Sink.cpp StraightLineStrengthReduce.cpp StructurizeCFG.cpp Index: lib/Transforms/Scalar/SimplifySelectPass.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/SimplifySelectPass.cpp @@ -0,0 +1,459 @@ +//===- SimplifySelectPass.cpp - Select Simplification Pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting select to machine specific intrisics like +// llvm.arm.usat or llvm.arm.ssat +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplifyselect" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/IR/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +STATISTIC(NumSelSimpl, "Number of selections simplified"); + +namespace { +struct SimplifySelect : public FunctionPass { + static char ID; + SimplifySelect() : FunctionPass(ID) { + initializeSimplifySelectPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Convert selects to intrinsics"; + } + virtual bool runOnFunction(Function &F); +}; +} // End anaonymous namespace. + +char SimplifySelect::ID = 0; + +INITIALIZE_PASS(SimplifySelect, "simplifyselect", "Simplify the Select", false, + false) + +// Public interface to the SelectSimplification pass +FunctionPass *llvm::createSelectSimplificationPass() { + return new SimplifySelect(); +} + +// Check LHS/RHS of CMP and T/F of SELECT. The patter of CMP is "ICMP LHS, RHS", +// where one of the operands must be a constant C and other other is a +// non-constant and the comparion is either <, <=, > or >=. C must also be an +// operand of of SELECT. If X is not NULL, it must be equal to the non-constant +// operand of the ICMP. If SelectVal is not NULL, it must be equal to +// the non-constant operand of the SELECT. If SelectVal is NULL, the two +// non-constant operands of ICMP and SELECT should be equal. +// If the above conditions are met, the function will return True and C and +// X will be the constant and non-constant operands of ICMP, respectively. +static bool isMinMaxOfConstant(SelectInst *SI, bool &IsMin, + Value *&X /*Var in cmp*/, ConstantInt *&C, + Value *SelectVal, Value *&OpX /*Var in SI*/) { + if (!SI) + return false; + + const ICmpInst *Cmp = dyn_cast(SI->getCondition()); + if (!Cmp) + return false; + CmpInst::Predicate Pred = Cmp->getPredicate(); + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + + ConstantInt *CmpConst; + if ((CmpConst = dyn_cast(RHS)) && !isa(LHS)) { + if (X && X != LHS) + return false; + X = LHS; + } else if ((CmpConst = dyn_cast(LHS)) && + !isa(RHS)) { + if (X && X != RHS) + return false; + X = RHS; + } else + return false; + + Value *TrueValue = SI->getTrueValue(); + Value *FalseValue = SI->getFalseValue(); + + // One and only one constant in Selection. + ConstantInt *SIConst = dyn_cast(TrueValue); + Value *SIVar = FalseValue; + if (!SIConst) { + SIConst = dyn_cast(FalseValue); + SIVar = TrueValue; + if (!SIConst) + return false; + } else if (isa(FalseValue)) + return false; + + C = SIConst; + + if (!OpX) + OpX = SIVar; + + if (SelectVal && SelectVal != SIVar) + return false; + + // When we match the constants in CMP and SI, if signs are different, we need + // to use ZExt. E.g. i8 -1 is equivaluant to i16 255. + bool Zext = (SIConst->isNegative() && !CmpConst->isNegative()) || + (!SIConst->isNegative() && CmpConst->isNegative()); + int64_t SIConstV = Zext ? SIConst->getZExtValue() : SIConst->getSExtValue(); + int64_t CmpConstV = + Zext ? CmpConst->getZExtValue() : CmpConst->getSExtValue(); + + // Check constants of CMP and SELECT. They must be equal or off by 1. + bool IsSIConstGT = (SIConstV - CmpConstV) == 1; + bool IsSIConstLT = (CmpConstV - SIConstV) == 1; + if (SIConstV != CmpConstV && !IsSIConstGT && !IsSIConstLT) + return false; + + if (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE || + Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE) { + if (CmpConst == RHS) { + IsMin = (SIVar == TrueValue); + // X max, X min or + // X < C + 1 ? C : X => max, x < C + 1 ? X : C => min + return (CmpConstV == SIConstV || + ((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) && + IsSIConstLT)); + } else { + // C max, C min + // C - 1 < X ? X : C => max, C - 1 < X ? C : X => min + IsMin = (SIVar == FalseValue); + return (CmpConstV == SIConstV || + ((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) && + IsSIConstGT)); + } + return false; + } + + if (Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE || + Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE) { + if (CmpConst == RHS) { + IsMin = (SIVar == FalseValue); + // X >/>= C ? C : X => min, X >/>= C ? X : C => max or + // X > C - 1 ? C : X => min, X > C - 1 ? X : C => max + return (CmpConstV == SIConstV || + ((Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_UGT) && + IsSIConstGT)); + } else { + IsMin = (SIVar == TrueValue); + // C >/>= X ? X : C => min, C >/>= X ? C : X => max or + // C + 1 > X ? X : C => min, C + 1 > X ? C : X => max + return (CmpConstV == SIConstV || + ((Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_UGT) && + IsSIConstLT)); + } + } + return false; +} + +// Check if the CMP and SELECT are in the form of canonical clamping. +static bool isCanonicalSaturate(SelectInst *SI, SelectInst *SI2, Value *&X, + const ConstantInt *&Low, + const ConstantInt *&High, Value *&OpX) { + bool IsMin; + ConstantInt *C = NULL; + // Check if it is the form like : X > C ? op(X) : C. + if (!isMinMaxOfConstant(SI, IsMin, X, C, NULL, OpX)) + return false; + if (IsMin) + High = C; // min(C, X) ==> Clamp to [X, C]. + else + Low = C; // max(C, X) ==> Clamp to [C, X]. + + // Next to see if S1 and S2 can be replaced with SSAT/USAT. + + // Check if it is the form like : X > C ? C : SI. + if (!isMinMaxOfConstant(SI2, IsMin, X, C, SI, OpX)) + return false; + + if (IsMin && !High) + High = C; + else if (!IsMin && !Low) + Low = C; + + if (!Low || !High) + return false; + return true; +} + +// Check if OpX is a series of operaions (e.g. trunc, cast) on X. +// Currently, it only checks trunc instr. +static bool isFactorableOp(const Value *X, Value *OpX, + SmallVectorImpl &Ops) { + if (X == OpX) + return true; + Instruction *Inst = dyn_cast(OpX); + if (!Inst) + return false; + Ops.push_back(Inst); + if (dyn_cast(Inst)) + return isFactorableOp(X, Inst->getOperand(0), Ops); + return false; +} + +// Check for special forms of clamping like: +// t = (unsigned) x <= MAX ? x : (x < 0 ? 0 : MAX); return op(t) +// or t = (unsigned) x <= MAX ? x : (x > MAX ? MAX : 0); return op(t) +// if MAX's bitwidth is less than type of x. +// When op(C) = C (C is 0 or Max), previous passes might transform it to: +// return (unsigned) x < MAX ? op(x) : (x<0 ? 0 : MAX); +// This function checks if the pattern above could be found or not. +static bool isSpecialSaturate(const SelectInst *SI, SelectInst *SI2, Value *&X, + const ConstantInt *&Low, const ConstantInt *&High, + Value *&OpX) { + Low = High = NULL; + X = NULL; + const ICmpInst *Cmp1 = dyn_cast(SI->getCondition()); + const ICmpInst *Cmp2 = dyn_cast(SI2->getCondition()); + + if (!Cmp1 || !Cmp2) + return false; + + // Check for %cmp = icmp slt/sle %x, 0 (or slt %x, MAX) + // %selv = select %cmp, 0, max + // or + // %cmp = icmp sgt %x ,0 or (or sgt/sge %x, MAX) + // %selv = select %cmp, max, 0 + CmpInst::Predicate Pred1 = Cmp1->getPredicate(); + if (Pred1 != CmpInst::ICMP_SLT && Pred1 != CmpInst::ICMP_SLE && + Pred1 != CmpInst::ICMP_SGT && Pred1 != CmpInst::ICMP_SGE) + return false; + + ConstantInt *LHS = dyn_cast(Cmp1->getOperand(0)); + ConstantInt *RHS = dyn_cast(Cmp1->getOperand(1)); + // one operand must be constant, the other one must be non-constant. + if ((LHS && RHS) || (!LHS && !RHS)) + return false; + // X is the non-constant operand. + X = RHS ? Cmp1->getOperand(0) : Cmp1->getOperand(1); + ConstantInt *CmpConst = LHS ? LHS : RHS; + + // Check Select: Either False or True is zero but not both are zeros. + const ConstantInt *TrueValue = dyn_cast(SI->getTrueValue()), + *FalseValue = dyn_cast(SI->getFalseValue()); + if (!TrueValue || !FalseValue) + return false; + if (!(FalseValue->isZero() ^ TrueValue->isZero())) + return false; + High = TrueValue->isZero() ? FalseValue : TrueValue; + Low = TrueValue->isZero() ? TrueValue : FalseValue; + + // Check the combination of cmp op and operand order of cmp and select. + bool IsXPos; // If X is positive, the True value of select should be High. + if (!CmpConst->isZero() && CmpConst != High) + // X does not compare with either 0 or max. + return false; + else if (CmpConst->isZero()) { + // x />= x. + if (((Pred1 == CmpInst::ICMP_SLT || Pred1 == CmpInst::ICMP_SLE) && RHS) || + ((Pred1 == CmpInst::ICMP_SGT || Pred1 == CmpInst::ICMP_SGE) && LHS)) + IsXPos = false; + // x > 0 || 0 < x. + else if ((Pred1 == CmpInst::ICMP_SGT && RHS) || + (Pred1 == CmpInst::ICMP_SLT && LHS)) + IsXPos = true; + else + return false; + } else { // (CmpConst == High) + // x >/>= MAX || MAX x. + else if ((Pred1 == CmpInst::ICMP_SLT && RHS) || + (Pred1 == CmpInst::ICMP_SGT && LHS)) + IsXPos = false; + else + return false; + } + + // Check if the position of TrueValue and FalseValue is correct or not. + if ((IsXPos ? TrueValue : FalseValue) != High) + return false; + + IntegerType *Ty = dyn_cast(X->getType()); + if (!Ty || (Ty->getSignBit() & High->getZExtValue())) + return false; + + // Check for %cmp = ult %x, max+1 (or ule %x, max) + // %selv1 = %cmp, op(%x), selv + CmpInst::Predicate Pred2 = Cmp2->getPredicate(); + if (Pred2 == CmpInst::ICMP_ULT || Pred2 == CmpInst::ICMP_ULE || + Pred2 == CmpInst::ICMP_UGT || Pred2 == CmpInst::ICMP_UGE) { + // X must be present and the other operand must be constant + const ConstantInt *High2 = NULL; + if (X == Cmp2->getOperand(0)) + High2 = dyn_cast(Cmp2->getOperand(1)); + else if (X == Cmp2->getOperand(1)) + High2 = dyn_cast(Cmp2->getOperand(0)); + if (!High2) + return false; + + bool isLTCmp = (Pred2 == CmpInst::ICMP_ULT || Pred2 == CmpInst::ICMP_ULE); + bool XIsLHS = X == Cmp2->getOperand(0); + + // Check if X < High+1 or X <= High or High >= X or High+1 > X. + bool XIsLT = !(XIsLHS ^ isLTCmp); + bool NeedMinusOne = (XIsLHS && Pred2 == CmpInst::ICMP_ULT) || + (!XIsLHS && Pred2 == CmpInst::ICMP_UGT) || + (XIsLHS && Pred2 == CmpInst::ICMP_UGE) || + (!XIsLHS && Pred2 == CmpInst::ICMP_ULE); + + // Check if the High value matches with the one in prev selection. + // It's OK to compare ZExted values because they are both supposed to be + // positive values. We won't match two negative values of different types. + uint64_t High2Val = High2->getZExtValue(); + if (NeedMinusOne) + --High2Val; + if (High2Val != High->getZExtValue()) + return false; + + Value *TrueValue = SI2->getTrueValue(); + Value *FalseValue = SI2->getFalseValue(); + + if (XIsLT && FalseValue == SI) { + OpX = TrueValue; + return true; + } + if (!XIsLT && TrueValue == SI) { + OpX = FalseValue; + return true; + } + } + return false; +} + +static Value *changeToSat(const ConstantInt *Low, const ConstantInt *High, + Value *X, Instruction *InsertBefore, Module *M) { + Function *USatFunc = Intrinsic::getDeclaration(M, Intrinsic::arm_usat); + Function *SSatFunc = Intrinsic::getDeclaration(M, Intrinsic::arm_ssat); + + Value *Args[2]; + Type *VT = USatFunc->getFunctionType()->getParamType(0); + + // If the data size of X is larger than that of usat/ssat's first paramter ( + // the value to be saturated), then it's unsafe to saturate because after + // truncating, the original out-of-range value might become in-range. + // If the data size of X is smaller, it will be extended and the result + // needs to be truncated. + + if (X->getType()->getScalarSizeInBits() > VT->getScalarSizeInBits()) + return NULL; + bool NeedsCasting = + X->getType()->getScalarSizeInBits() < VT->getScalarSizeInBits(); + + // Check if the range matches that of usat: [0, 2^n-1]. + if (Low->isZero()) { + unsigned Amount = 0; + if (!High->isNegative() && (High->getValue() + 1).isPowerOf2()) + Amount = (High->getValue() + 1).exactLogBase2(); + else if (High->isMinusOne()) + Amount = High->getBitWidth(); + if (Amount >= 1 && Amount <= 32) { + Args[0] = !NeedsCasting ? X : CastInst::Create(Instruction::ZExt, X, VT, + "", InsertBefore); + Args[1] = ConstantInt::get(VT, Amount); + Value *CI = CallInst::Create(USatFunc, Args, "", InsertBefore); + return !NeedsCasting ? CI + : CastInst::Create(Instruction::Trunc, CI, + X->getType(), "", InsertBefore); + } + } + // Check if the range matches that of ssat: [-2^{n-1}, 2^{n-1} -1]. + if (Low->isNegative() && !High->isNegative() && + Low->getValue().abs() == (High->getValue() + 1) && + Low->getValue().abs().isPowerOf2()) { + unsigned Amount = Low->getValue().abs().exactLogBase2() + 1; + if (Amount >= 1 && Amount <= 32) { + Args[0] = !NeedsCasting ? X : CastInst::Create(Instruction::SExt, X, VT, + "", InsertBefore); + // The SSAT intrinsic will increase it by 1. + Args[1] = ConstantInt::get(VT, Amount - 1); + Value *CI = CallInst::Create(SSatFunc, Args, "", InsertBefore); + return !NeedsCasting ? CI + : CastInst::Create(Instruction::Trunc, CI, + X->getType(), "", InsertBefore); + } + } + return NULL; +} + +static bool changeToARMIntrinsics(BasicBlock &BB) { + bool Changed = false; + + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + SelectInst *SI = dyn_cast(I); + if (!SI || !SI->hasOneUse()) + continue; + SelectInst *SI2 = dyn_cast(*SI->user_begin()); + if (!SI2 || SI2->getParent() != &BB) + continue; + + Value *X = NULL, *OpX = NULL; + const ConstantInt *Low = NULL, *High = NULL; + + if (!isCanonicalSaturate(SI, SI2, X, Low, High, OpX) && + !isSpecialSaturate(SI, SI2, X, Low, High, OpX)) + continue; + + SmallVector Ops; + if (!OpX || (X != OpX && !isFactorableOp(X, OpX, Ops))) + continue; + + Instruction *InsertBefore = Ops.empty() ? SI2 : *Ops.rbegin(); + + Module *M = BB.getParent()->getParent(); + Value *NewCall = changeToSat(Low, High, X, InsertBefore, M); + + if (NewCall) { + DEBUG(dbgs() << "Replacing:" << *SI << " and " << *SI2 << "\n"); + ++NumSelSimpl; + Changed = true; + // Apply the factorable ops to saturating result. + if (Ops.size()) { + dyn_cast(OpX)->setOperand(0, NewCall); + NewCall = OpX; + } + SI2->replaceAllUsesWith(NewCall); + E = BB.end(); + } + } + return Changed; +} + +bool SimplifySelect::runOnFunction(Function &F) { + bool Changed = false; + llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); + + StringRef Arch = TargetTriple.getArchName(); + // Only ARMv6 or above and Thumb supports usat/ssat. + if (Arch.startswith("armv6") || Arch.startswith("armv7") || + Arch.startswith("thumb")) + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ++BBIt) + Changed |= (changeToARMIntrinsics(*BBIt)); + + return Changed; +} Index: test/CodeGen/ARM/intrinsics-sat.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/intrinsics-sat.ll @@ -0,0 +1,497 @@ +; RUN: opt < %s -O3 -mtriple=armv7-none-linux-gnueabi -enable-select-to-intrinsics -S | FileCheck %s +; RUN: opt < %s -O3 -mtriple=armv7-none-linux-gnueabi -enable-select-to-intrinsics -S | llc | FileCheck --check-prefix=CHECK-LLC %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +;return x < -64 ? -64 : (x > 63 ? 63 : x); +define i32 @ssat1(i32 %x) nounwind readnone { +;CHECK: @ssat1 +;CHECK-LLC: ssat1: +entry: +;CHECK: call i32 @llvm.arm.ssat(i32 %x, i32 6) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: ssat r{{[0-9]}}, #7 + %cmp1 = icmp sgt i32 %x, 63 + %sel1 = select i1 %cmp1, i32 63, i32 %x + %cmp = icmp slt i32 %x, -64 + %sel2 = select i1 %cmp, i32 -64, i32 %sel1 + ret i32 %sel2 +} + +;return x < -1 ? -1 : (x > 0 ? 0 : x); //Corner case +define i32 @ssat2(i32 %x) nounwind readnone { +;CHECK: @ssat2 +;CHECK-LLC: ssat2: +entry: +;CHECK: call i32 @llvm.arm.ssat(i32 %x, i32 0) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: ssat r{{[0-9]}}, #1 + %cmp = icmp slt i32 %x, -1 + %cmp1 = icmp sgt i32 %x, 0 + %sel1 = select i1 %cmp1, i32 0, i32 %x + %sel3 = select i1 %cmp, i32 -1, i32 %sel1 + ret i32 %sel3 +} + +; return x > 255 ? 255 : (x < 0 ? 0 : x); +define i32 @usat1(i32 %x) nounwind readnone { +; CHECK: @usat1 +; CHECK-LLC: usat1: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp sgt i32 %x, 255 + %cmp1 = icmp slt i32 %x, 0 + %sel1 = select i1 %cmp1, i32 0, i32 %x + %sel2 = select i1 %cmp, i32 255, i32 %sel1 + ret i32 %sel2 +} + +;return x < 0 ? 0 : (x > 1 ? 1 : x); //Corner case +define i32 @usat_one(i32 %x) nounwind readnone { +; CHECK: @usat_one +; CHECK-LLC: usat_one: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 1) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #1 + %cmp = icmp slt i32 %x, 0 + %cmp1 = icmp sgt i32 %x, 1 + %sel3 = select i1 %cmp1, i32 1, i32 %x + %sel8 = select i1 %cmp, i32 0, i32 %sel3 + ret i32 %sel8 +} + + +;return x < 0 ? 0 : (x > 31 ? 31 : x); +define i32 @usat2(i32 %x) nounwind readnone { +; CHECK: @usat2 +; CHECK-LLC: usat2: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 5) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #5 + %cmp = icmp slt i32 %x, 0 + %cmp1 = icmp sgt i32 %x, 31 + %sel3 = select i1 %cmp1, i32 31, i32 %x + %sel4 = select i1 %cmp, i32 0, i32 %sel3 + ret i32 %sel4 +} + +; return (unsigned)x < 256 ? (char)x : (x < 0 ? 0 : 255) +define i8 @usat3(i32 %a) nounwind readnone { +; CHECK: @usat3 +; CHECK-LLC: usat3: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp ult i32 %a, 256 + %conv = trunc i32 %a to i8 + %cmp1 = icmp slt i32 %a, 0 + %selv = select i1 %cmp1, i8 0, i8 -1 + %selv1 = select i1 %cmp, i8 %conv, i8 %selv + ret i8 %selv1 +} + +; return (unsigned)x >= 256 ? (0 > x ? 0 : 255) : x +define i32 @usat4(i32 %a) nounwind readnone { +; CHECK: @usat4 +; CHECK-LLC: usat4: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp uge i32 %a, 256 + %cmp1 = icmp sgt i32 0, %a + %selv = select i1 %cmp1, i32 0, i32 255 + %selv1 = select i1 %cmp, i32 %selv, i32 %a + ret i32 %selv1 +} + +; return (unsigned)x <= 255 ? x: (x < 0 ? 0 : 255) +define i32 @usat5(i32 %a) nounwind readnone { +; CHECK: @usat5 +; CHECK-LLC: usat5: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp ule i32 %a, 255 + %cmp1 = icmp sgt i32 0, %a + %selv = select i1 %cmp1, i32 0, i32 255 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} + +; return (unsigned)x <= 256 ? x: (x < 0 ? 0 : 255) +define i32 @not_usat5(i32 %a) nounwind readnone { +;CHECK: not_usat5 +;CHECK-LLC: not_usat5: +entry: +;CHECK-NOT: @llvm.arm.usat +;CHECK-LLC-NOT: usat r{{[0-9]}} +;CHECK: cmp +;CHECK: select + %cmp = icmp ule i32 %a, 256 + %cmp1 = icmp sgt i32 0, %a + %selv = select i1 %cmp1, i32 0, i32 255 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} + +; return (unsigned)x < 256 ? x: (x > 0 ? 255 : 0) +define i32 @usat6(i32 %a) nounwind readnone { +; CHECK: @usat6 +; CHECK-LLC: usat6: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp ult i32 %a, 256 + %cmp1 = icmp sgt i32 %a, 0 + %selv = select i1 %cmp1, i32 255, i32 0 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} + +; return (unsigned)x < 256 ? x: (x >= 0 ? 255 : 0) +define i32 @not_usat6(i32 %a) nounwind readnone { +; CHECK: @not_usat6 +; CHECK-LLC: not_usat6: +entry: +;CHECK-NOT: @llvm.arm.usat +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: usat r{{[0-9]}} + %cmp = icmp ult i32 %a, 256 + %cmp1 = icmp sge i32 %a, 0 + %selv = select i1 %cmp1, i32 255, i32 0 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} + +; return (unsigned)x < 256 ? x: (x >= 255 ? 255 : 0) +define i32 @usat7(i32 %a) nounwind readnone { +; CHECK: @usat7 +; CHECK-LLC: usat7: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp ult i32 %a, 256 + %cmp1 = icmp sge i32 %a, 255 + %selv = select i1 %cmp1, i32 255, i32 0 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} + +; return (unsigned)x < 256 ? x: (x < 255 ? 0 : 255) +define i32 @usat8(i32 %a) nounwind readnone { +; CHECK: @usat8 +; CHECK-LLC: usat8: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp ult i32 %a, 256 + %cmp1 = icmp slt i32 %a, 255 + %selv = select i1 %cmp1, i32 0, i32 255 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} +; return (unsigned)x < 256 ? x: (x <= 255 ? 0 : 255) +define i32 @not_usat8(i32 %a) nounwind readnone { +; CHECK: @not_usat8 +; CHECK-LLC: not_usat8: +entry: +;CHECK-NOT: @llvm.arm.usat +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: usat r{{[0-9]}} + %cmp = icmp ult i32 %a, 256 + %cmp1 = icmp sle i32 %a, 255 + %selv = select i1 %cmp1, i32 0, i32 255 + %selv1 = select i1 %cmp, i32 %a, i32 %selv + ret i32 %selv1 +} + +; return (a<0 ? 0 : (unsigned) a < 256 ? (char) a : 255) +define zeroext i8 @usat9(i32 %a) nounwind readnone { +; CHECK: @usat9 +; CHECK-LLC: usat9: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp slt i32 %a, 0 + %cmp1 = icmp ult i32 %a, 256 + %conv = trunc i32 %a to i8 + %selv = select i1 %cmp1, i8 %conv, i8 -1 + %selv1 = select i1 %cmp, i8 0, i8 %selv + ret i8 %selv1 +} + +; return ((x<0 ? 0 : (x > 255 ? 255 : x)) >> 2) << 5; } +define i32 @usat_shifts(i32 %x) nounwind { +;CHECK: @usat_shifts +;CHECK-LLC: usat_shifts: +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 8) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %cmp = icmp slt i32 %0, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + br label %cond.end4 + +cond.false: ; preds = %entry + %1 = load i32, i32* %x.addr, align 4 + %cmp1 = icmp sgt i32 %1, 255 + br i1 %cmp1, label %cond.true2, label %cond.false3 + +cond.true2: ; preds = %cond.false + br label %cond.end + +cond.false3: ; preds = %cond.false + %2 = load i32, i32* %x.addr, align 4 + br label %cond.end + +cond.end: ; preds = %cond.false3, %cond.true2 + %cond = phi i32 [ 255, %cond.true2 ], [ %2, %cond.false3 ] + br label %cond.end4 + +cond.end4: ; preds = %cond.end, %cond.true + %cond5 = phi i32 [ 0, %cond.true ], [ %cond, %cond.end ] + %shr = ashr i32 %cond5, 2 + %shl = shl i32 %shr, 5 + ret i32 %shl +} + +; Negative tests +; return x < 0 ? 3 : (x > 255 ? 255 : x); +define i32 @not_usat1(i32 %x) nounwind readnone { +;CHECK: @not_usat1 +;CHECK-LLC: not_usat1: +entry: +;CHECK-NOT: @llvm.arm. +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: sat r + %cmp = icmp slt i32 %x, 0 + %cmp1 = icmp sgt i32 %x, 255 + %sel9 = select i1 %cmp1, i32 255, i32 %x + %sel10 = select i1 %cmp, i32 3, i32 %sel9 + ret i32 %sel10 +} + +; return x < 0 ? 0 : (x > 256 ? 256 : x); +define i32 @not_usat2(i32 %x) nounwind readnone { +;CHECK: @not_usat2 +;CHECK-LLC: not_usat2: +entry: +;CHECK-NOT: @llvm.arm. +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: sat r + %cmp = icmp slt i32 %x, 0 + %cmp1 = icmp sgt i32 %x, 256 + %sel11 = select i1 %cmp1, i32 256, i32 %x + %sel12 = select i1 %cmp, i32 0, i32 %sel11 + ret i32 %sel12 +} + +; return x < 0 ? 0 : (x > 255 ? x : 255); +define i32 @not_usat3(i32 %x) nounwind readnone { +;CHECK: @not_usat3 +;CHECK-LLC: not_usat3: +entry: +;CHECK-NOT: @llvm.arm. +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: sat r + %cmp = icmp slt i32 %x, 0 + %cmp1 = icmp sgt i32 %x, 255 + %sel13 = select i1 %cmp1, i32 %x, i32 255 + %sel14 = select i1 %cmp, i32 0, i32 %sel13 + ret i32 %sel14 +} + +; return x < 0 ? (x > 255 ? 255 : x) : 0; +define i32 @not_usat4(i32 %x) nounwind readnone { +;CHECK: @not_usat4 +;CHECK-LLC: not_usat4: +entry: +;CHECK-NOT: @llvm.arm. +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: sat r + %cmp = icmp slt i32 %x, 0 + %cmp1 = icmp sgt i32 %x, 255 + %sel15 = select i1 %cmp1, i32 255, i32 %x + %sel16 = select i1 %cmp, i32 %sel15, i32 0 + ret i32 %sel16 +} + +; return x > 0 ? 0 : (x < 0 ? 0 : x); +; This will be optimized to ret 0 by the select-of-select instcombine +define i32 @not_usat_zero(i32 %x) nounwind readnone { +;CHECK: @not_usat_zero +;CHECK-LLC: not_usat_zero: +entry: +; CHECK-NOT: @llvm.arm. +; CHECK-LLC-NOT: sat r + %cmp = icmp sgt i32 %x, 0 + %cmp1 = icmp slt i32 %x, 0 + %sel5 = select i1 %cmp1, i32 0, i32 %x + %sel6 = select i1 %cmp, i32 0, i32 %sel5 + ret i32 %sel6 +} + +; return x < -64 ? -64 : (x > 64 ? 64 : x); +define i32 @not_ssat1(i32 %x) nounwind readnone { +;CHECK: @not_ssat1 +;CHECK-LLC: not_ssat1 +entry: +; CHECK-NOT: @llvm.arm. +; CHECK: cmp +; CHECK: select +; CHECK-LLC-NOT: sat r + %cmp = icmp slt i32 %x, -64 + %cmp1 = icmp sgt i32 %x, 64 + %sel3 = select i1 %cmp1, i32 64, i32 %x + %sel4 = select i1 %cmp, i32 -64, i32 %sel3 + ret i32 %sel4 +} + +define i64 @not_ssat_i64(i64 %x) nounwind readnone { +;CHECK: @not_ssat_i64 +;CHECK-LLC: not_ssat_i64: +entry: +;CHECK-NOT: @llvm.arm. +;CHECK: cmp +;CHECK: select +;CHECK-LLC-NOT: sat r + %cmp = icmp slt i64 %x, -64 + %cmp1 = icmp sgt i64 %x, 63 + %sel1 = select i1 %cmp1, i64 63, i64 %x + %sel2 = select i1 %cmp, i64 -64, i64 %sel1 + ret i64 %sel2 +} + +;return x < -64 ? -64 : (x > 63 ? 63 : x); +define i8 @ssat_i8(i8 %x) nounwind readnone { +;CHECK: @ssat_i8 +;CHECK-LLC: ssat_i8: +entry: +;CHECK: sext i8 %x to i32 +;CHECK: call i32 @llvm.arm.ssat(i32 %{{.*}}, i32 6) +;CHECK: trunc i32 %{{.*}} to i8 +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: ssat r{{[0-9]}}, #7 + %cmp = icmp slt i8 %x, -64 + %cmp1 = icmp sgt i8 %x, 63 + %sel1 = select i1 %cmp1, i8 63, i8 %x + %sel2 = select i1 %cmp, i8 -64, i8 %sel1 + ret i8 %sel2 +} + +define i8 @usat_i8(i8 %x) nounwind readnone { +; CHECK: @usat_i8 +; CHECK-LLC: usat_i8: +entry: +;CHECK: zext i8 %x to i32 +;CHECK: call i32 @llvm.arm.usat(i32 %{{.*}}, i32 8) +;CHECK: trunc i32 %{{.*}} to i8 +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #8 + %cmp = icmp sgt i8 %x, 255 + %cmp1 = icmp slt i8 %x, 0 + %sel1 = select i1 %cmp1, i8 0, i8 %x + %sel2 = select i1 %cmp, i8 255, i8 %sel1 + ret i8 %sel2 +} + +define i16 @trunc_to_i16(i32 %add) { +;CHECK-LABEL: @trunc_to_i16 +entry: +;CHECK: call i32 @llvm.arm.ssat(i32 %{{.*}}, i32 15) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: ssat r{{[0-9]}}, #16 + %cmp = icmp slt i32 %add, -32768 + %cmp2 = icmp sgt i32 %add, 32767 + %0 = trunc i32 %add to i16 + %phitmp = select i1 %cmp2, i16 32767, i16 %0 + %cond7 = select i1 %cmp, i16 -32768, i16 %phitmp + ret i16 %cond7 +} + +define i16 @trunc_to_i16_u(i32 %add) { +;CHECK-LABEL: @trunc_to_i16_u +entry: +;CHECK: call i32 @llvm.arm.usat(i32 %{{.*}}, i32 16) +;CHECK-NOT: cmp +;CHECK-NOT: select +;CHECK-LLC: usat r{{[0-9]}}, #16 + %cmp = icmp slt i32 %add, 0 + %cmp2 = icmp sgt i32 %add, 65535 + %0 = trunc i32 %add to i16 + %phitmp = select i1 %cmp2, i16 65535, i16 %0 + %cond7 = select i1 %cmp, i16 0, i16 %phitmp + ret i16 %cond7 +} + +; CHECK: @foo1 +; CHECK: @llvm.arm.usat(i32 {{%[0-9]}}, i32 8) +define void @foo1(i8* %Dst, i16* %Src) #0 { +entry: + %arrayidx = getelementptr inbounds i16, i16* %Src, i32 7 + %0 = load i16, i16* %arrayidx, align 2 + %cmp = icmp slt i16 %0, 0 + %cmp4 = icmp sgt i16 %0, 255 + %1 = trunc i16 %0 to i8 + %phitmp = select i1 %cmp4, i8 -1, i8 %1 + %selv = select i1 %cmp, i8 0, i8 %phitmp + %arrayidx13 = getelementptr inbounds i8, i8* %Dst, i32 15 + store i8 %selv, i8* %arrayidx13, align 1 + ret void +} + +; CHECK: @foo2 +; CHECK: @llvm.arm.ssat(i32 {{%[0-9]}}, i32 8) +define void @foo2(i8* %Dst, i16* %Src) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %Src, i32 7 + %0 = load i16, i16* %arrayidx, align 2 + %1 = sext i16 %0 to i32 + %cmp = icmp slt i32 %1, -256 + %cmp1 = icmp sgt i32 %1, 255 + %sel1 = select i1 %cmp1, i32 255, i32 %1 + %sel3 = select i1 %cmp, i32 -256, i32 %sel1 + %2 = trunc i32 %sel3 to i8 + %arrayidx13 = getelementptr inbounds i8, i8* %Dst, i32 15 + store i8 %2, i8* %arrayidx13, align 1 + ret void +}