Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -249,7 +249,8 @@
 void initializeSimpleInlinerPass(PassRegistry&);
 void initializeShadowStackGCLoweringPass(PassRegistry&);  
 void initializeRegisterCoalescerPass(PassRegistry&);
-void initializeSingleLoopExtractorPass(PassRegistry&);
+void initializeSimplifySelectPass(PassRegistry &);
+void initializeSingleLoopExtractorPass(PassRegistry &);
 void initializeSinkingPass(PassRegistry&);
 void initializeSeparateConstOffsetFromGEPPass(PassRegistry &);
 void initializeSlotIndexesPass(PassRegistry&);
Index: include/llvm/Transforms/Scalar.h
===================================================================
--- include/llvm/Transforms/Scalar.h
+++ include/llvm/Transforms/Scalar.h
@@ -346,6 +346,12 @@
 
 //===----------------------------------------------------------------------===//
 //
+// SelectSimplyfication - Convert select to saturation instructions in ARM.
+//
+FunctionPass *createSelectSimplificationPass();
+
+//===----------------------------------------------------------------------===//
+//
 // ConstantHoisting - This pass prepares a function for expensive constants.
 //
 FunctionPass *createConstantHoistingPass();
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -55,9 +55,11 @@
   cl::init(true), cl::Hidden,
   cl::desc("Enable the new, experimental SROA pass"));
 
-static cl::opt<bool>
-RunLoopRerolling("reroll-loops", cl::Hidden,
-                 cl::desc("Run the loop rerolling pass"));
+static cl::opt<bool> EnableSelectToIntrinsics("enable-select-to-intrinsics",
+                                              cl::init(true));
+
+static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden,
+                                      cl::desc("Run the loop rerolling pass"));
 
 static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
                                     cl::Hidden,
@@ -159,6 +161,13 @@
     FPM.add(createSROAPass());
   else
     FPM.add(createScalarReplAggregatesPass());
+  if (EnableSelectToIntrinsics) {
+    // ScalarReplAggregatePass exposes more "ifs" to be converted to SELECT.
+    // EarlyCSE will prevent some "ifs" to be converted to SELECT. So, we
+    // add a CFGSimplification here, followed by SelectSimplification.
+    FPM.add(createCFGSimplificationPass());
+    FPM.add(createSelectSimplificationPass());
+  }
   FPM.add(createEarlyCSEPass());
   FPM.add(createLowerExpectIntrinsicPass());
 }
Index: lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- lib/Transforms/Scalar/CMakeLists.txt
+++ lib/Transforms/Scalar/CMakeLists.txt
@@ -41,6 +41,7 @@
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
   SimplifyCFGPass.cpp
+  SimplifySelectPass.cpp
   Sink.cpp
   StraightLineStrengthReduce.cpp
   StructurizeCFG.cpp
Index: lib/Transforms/Scalar/SimplifySelectPass.cpp
===================================================================
--- /dev/null
+++ lib/Transforms/Scalar/SimplifySelectPass.cpp
@@ -0,0 +1,459 @@
+//===- SimplifySelectPass.cpp - Select Simplification Pass ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting select to machine specific intrisics like
+// llvm.arm.usat or llvm.arm.ssat
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifyselect"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSelSimpl, "Number of selections simplified");
+
+namespace {
+struct SimplifySelect : public FunctionPass {
+  static char ID;
+  SimplifySelect() : FunctionPass(ID) {
+    initializeSimplifySelectPass(*PassRegistry::getPassRegistry());
+  }
+  virtual const char *getPassName() const {
+    return "Convert selects to intrinsics";
+  }
+  virtual bool runOnFunction(Function &F);
+};
+} // End anaonymous namespace.
+
+char SimplifySelect::ID = 0;
+
+INITIALIZE_PASS(SimplifySelect, "simplifyselect", "Simplify the Select", false,
+                false)
+
+// Public interface to the SelectSimplification pass
+FunctionPass *llvm::createSelectSimplificationPass() {
+  return new SimplifySelect();
+}
+
+// Check LHS/RHS of CMP and T/F of SELECT. The patter of CMP is "ICMP LHS, RHS",
+// where one of the operands must be a constant C and other other is a
+// non-constant and the comparion is either <, <=, > or >=. C must also be an
+// operand of of SELECT. If X is not NULL, it must be equal to the non-constant
+// operand of the ICMP. If SelectVal is not NULL, it must be equal to
+// the non-constant operand of the SELECT. If SelectVal is NULL, the two
+// non-constant operands of ICMP and SELECT should be equal.
+// If the above conditions are met, the function will return True and C and
+// X will be the constant and non-constant operands of ICMP, respectively.
+static bool isMinMaxOfConstant(SelectInst *SI, bool &IsMin,
+                               Value *&X /*Var in cmp*/, ConstantInt *&C,
+                               Value *SelectVal, Value *&OpX /*Var in SI*/) {
+  if (!SI)
+    return false;
+
+  const ICmpInst *Cmp = dyn_cast<ICmpInst>(SI->getCondition());
+  if (!Cmp)
+    return false;
+  CmpInst::Predicate Pred = Cmp->getPredicate();
+  Value *LHS = Cmp->getOperand(0);
+  Value *RHS = Cmp->getOperand(1);
+
+  ConstantInt *CmpConst;
+  if ((CmpConst = dyn_cast<ConstantInt>(RHS)) && !isa<ConstantInt>(LHS)) {
+    if (X && X != LHS)
+      return false;
+    X = LHS;
+  } else if ((CmpConst = dyn_cast<ConstantInt>(LHS)) &&
+             !isa<ConstantInt>(RHS)) {
+    if (X && X != RHS)
+      return false;
+    X = RHS;
+  } else
+    return false;
+
+  Value *TrueValue = SI->getTrueValue();
+  Value *FalseValue = SI->getFalseValue();
+
+  // One and only one constant in Selection.
+  ConstantInt *SIConst = dyn_cast<ConstantInt>(TrueValue);
+  Value *SIVar = FalseValue;
+  if (!SIConst) {
+    SIConst = dyn_cast<ConstantInt>(FalseValue);
+    SIVar = TrueValue;
+    if (!SIConst)
+      return false;
+  } else if (isa<ConstantInt>(FalseValue))
+    return false;
+
+  C = SIConst;
+
+  if (!OpX)
+    OpX = SIVar;
+
+  if (SelectVal && SelectVal != SIVar)
+    return false;
+
+  // When we match the constants in CMP and SI, if signs are different, we need
+  // to use ZExt. E.g. i8 -1 is equivaluant to i16 255.
+  bool Zext = (SIConst->isNegative() && !CmpConst->isNegative()) ||
+              (!SIConst->isNegative() && CmpConst->isNegative());
+  int64_t SIConstV = Zext ? SIConst->getZExtValue() : SIConst->getSExtValue();
+  int64_t CmpConstV =
+      Zext ? CmpConst->getZExtValue() : CmpConst->getSExtValue();
+
+  // Check constants of CMP and SELECT. They must be equal or off by 1.
+  bool IsSIConstGT = (SIConstV - CmpConstV) == 1;
+  bool IsSIConstLT = (CmpConstV - SIConstV) == 1;
+  if (SIConstV != CmpConstV && !IsSIConstGT && !IsSIConstLT)
+    return false;
+
+  if (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE ||
+      Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE) {
+    if (CmpConst == RHS) {
+      IsMin = (SIVar == TrueValue);
+      // X </<= C ? C : X => max, X </<= C ? X : C => min or
+      // X < C + 1 ? C : X => max, x < C + 1 ? X : C => min
+      return (CmpConstV == SIConstV ||
+              ((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) &&
+               IsSIConstLT));
+    } else {
+      // C </<= X ? X : C => max, C </<= X ? C : X => min
+      // C - 1 < X ? X : C => max, C - 1 < X ? C : X => min
+      IsMin = (SIVar == FalseValue);
+      return (CmpConstV == SIConstV ||
+              ((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) &&
+               IsSIConstGT));
+    }
+    return false;
+  }
+
+  if (Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE ||
+      Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE) {
+    if (CmpConst == RHS) {
+      IsMin = (SIVar == FalseValue);
+      // X >/>= C ? C : X => min, X >/>= C ? X : C => max or
+      // X > C - 1  ? C : X => min, X > C - 1 ? X : C => max
+      return (CmpConstV == SIConstV ||
+              ((Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_UGT) &&
+               IsSIConstGT));
+    } else {
+      IsMin = (SIVar == TrueValue);
+      // C >/>= X ? X : C => min, C >/>= X ? C : X => max or
+      // C + 1 > X ? X : C => min, C + 1 > X ? C : X => max
+      return (CmpConstV == SIConstV ||
+              ((Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_UGT) &&
+               IsSIConstLT));
+    }
+  }
+  return false;
+}
+
+// Check if the CMP and SELECT are in the form of canonical clamping.
+static bool isCanonicalSaturate(SelectInst *SI, SelectInst *SI2, Value *&X,
+                                const ConstantInt *&Low,
+                                const ConstantInt *&High, Value *&OpX) {
+  bool IsMin;
+  ConstantInt *C = NULL;
+  // Check if it is the form like : X > C ? op(X) : C.
+  if (!isMinMaxOfConstant(SI, IsMin, X, C, NULL, OpX))
+    return false;
+  if (IsMin)
+    High = C; // min(C, X) ==> Clamp to [X, C].
+  else
+    Low = C; // max(C, X) ==> Clamp to [C, X].
+
+  // Next to see if S1 and S2 can be replaced with SSAT/USAT.
+
+  // Check if it is the form like : X > C ? C : SI.
+  if (!isMinMaxOfConstant(SI2, IsMin, X, C, SI, OpX))
+    return false;
+
+  if (IsMin && !High)
+    High = C;
+  else if (!IsMin && !Low)
+    Low = C;
+
+  if (!Low || !High)
+    return false;
+  return true;
+}
+
+// Check if OpX is a series of operaions (e.g. trunc, cast) on X.
+// Currently, it only checks trunc instr.
+static bool isFactorableOp(const Value *X, Value *OpX,
+                           SmallVectorImpl<Instruction *> &Ops) {
+  if (X == OpX)
+    return true;
+  Instruction *Inst = dyn_cast<Instruction>(OpX);
+  if (!Inst)
+    return false;
+  Ops.push_back(Inst);
+  if (dyn_cast<TruncInst>(Inst))
+    return isFactorableOp(X, Inst->getOperand(0), Ops);
+  return false;
+}
+
+// Check for special forms of clamping like:
+//     t = (unsigned) x <= MAX ? x : (x < 0 ? 0 : MAX); return op(t)
+//  or t = (unsigned) x <= MAX ? x : (x > MAX ? MAX : 0); return op(t)
+// if MAX's bitwidth is less than type of x.
+// When op(C) = C (C is 0 or Max), previous passes might transform it to:
+//     return (unsigned) x < MAX ?  op(x) : (x<0 ? 0 : MAX);
+// This function checks if the pattern above could be found or not.
+static bool isSpecialSaturate(const SelectInst *SI, SelectInst *SI2, Value *&X,
+                              const ConstantInt *&Low, const ConstantInt *&High,
+                              Value *&OpX) {
+  Low = High = NULL;
+  X = NULL;
+  const ICmpInst *Cmp1 = dyn_cast<ICmpInst>(SI->getCondition());
+  const ICmpInst *Cmp2 = dyn_cast<ICmpInst>(SI2->getCondition());
+
+  if (!Cmp1 || !Cmp2)
+    return false;
+
+  // Check for %cmp = icmp slt/sle %x, 0 (or slt %x, MAX)
+  //          %selv = select %cmp, 0, max
+  //          or
+  //          %cmp = icmp sgt %x ,0 or (or sgt/sge %x, MAX)
+  //          %selv = select %cmp, max, 0
+  CmpInst::Predicate Pred1 = Cmp1->getPredicate();
+  if (Pred1 != CmpInst::ICMP_SLT && Pred1 != CmpInst::ICMP_SLE &&
+      Pred1 != CmpInst::ICMP_SGT && Pred1 != CmpInst::ICMP_SGE)
+    return false;
+
+  ConstantInt *LHS = dyn_cast<ConstantInt>(Cmp1->getOperand(0));
+  ConstantInt *RHS = dyn_cast<ConstantInt>(Cmp1->getOperand(1));
+  // one operand must be constant, the other one must be non-constant.
+  if ((LHS && RHS) || (!LHS && !RHS))
+    return false;
+  // X is the non-constant operand.
+  X = RHS ? Cmp1->getOperand(0) : Cmp1->getOperand(1);
+  ConstantInt *CmpConst = LHS ? LHS : RHS;
+
+  // Check Select: Either False or True is zero but not both are zeros.
+  const ConstantInt *TrueValue = dyn_cast<ConstantInt>(SI->getTrueValue()),
+                    *FalseValue = dyn_cast<ConstantInt>(SI->getFalseValue());
+  if (!TrueValue || !FalseValue)
+    return false;
+  if (!(FalseValue->isZero() ^ TrueValue->isZero()))
+    return false;
+  High = TrueValue->isZero() ? FalseValue : TrueValue;
+  Low = TrueValue->isZero() ? TrueValue : FalseValue;
+
+  // Check the combination of cmp op and operand order of cmp and select.
+  bool IsXPos; // If X is positive, the True value of select should be High.
+  if (!CmpConst->isZero() && CmpConst != High)
+    // X does not compare with either 0 or max.
+    return false;
+  else if (CmpConst->isZero()) {
+    // x </<= 0 || 0 >/>= x.
+    if (((Pred1 == CmpInst::ICMP_SLT || Pred1 == CmpInst::ICMP_SLE) && RHS) ||
+        ((Pred1 == CmpInst::ICMP_SGT || Pred1 == CmpInst::ICMP_SGE) && LHS))
+      IsXPos = false;
+    // x > 0 || 0 < x.
+    else if ((Pred1 == CmpInst::ICMP_SGT && RHS) ||
+             (Pred1 == CmpInst::ICMP_SLT && LHS))
+      IsXPos = true;
+    else
+      return false;
+  } else { // (CmpConst == High)
+    // x >/>= MAX || MAX </<= x.
+    if (((Pred1 == CmpInst::ICMP_SGT || Pred1 == CmpInst::ICMP_SGE) && RHS) ||
+        ((Pred1 == CmpInst::ICMP_SLT || Pred1 == CmpInst::ICMP_SLE) && LHS))
+      IsXPos = true;
+    // x < MAX or MAX > x.
+    else if ((Pred1 == CmpInst::ICMP_SLT && RHS) ||
+             (Pred1 == CmpInst::ICMP_SGT && LHS))
+      IsXPos = false;
+    else
+      return false;
+  }
+
+  // Check if the position of TrueValue and FalseValue is correct or not.
+  if ((IsXPos ? TrueValue : FalseValue) != High)
+    return false;
+
+  IntegerType *Ty = dyn_cast<IntegerType>(X->getType());
+  if (!Ty || (Ty->getSignBit() & High->getZExtValue()))
+    return false;
+
+  // Check for %cmp = ult %x, max+1 (or ule %x, max)
+  //           %selv1 = %cmp, op(%x), selv
+  CmpInst::Predicate Pred2 = Cmp2->getPredicate();
+  if (Pred2 == CmpInst::ICMP_ULT || Pred2 == CmpInst::ICMP_ULE ||
+      Pred2 == CmpInst::ICMP_UGT || Pred2 == CmpInst::ICMP_UGE) {
+    // X must be present and the other operand must be constant
+    const ConstantInt *High2 = NULL;
+    if (X == Cmp2->getOperand(0))
+      High2 = dyn_cast<ConstantInt>(Cmp2->getOperand(1));
+    else if (X == Cmp2->getOperand(1))
+      High2 = dyn_cast<ConstantInt>(Cmp2->getOperand(0));
+    if (!High2)
+      return false;
+
+    bool isLTCmp = (Pred2 == CmpInst::ICMP_ULT || Pred2 == CmpInst::ICMP_ULE);
+    bool XIsLHS = X == Cmp2->getOperand(0);
+
+    // Check if X < High+1 or X <= High or High >= X or High+1 > X.
+    bool XIsLT = !(XIsLHS ^ isLTCmp);
+    bool NeedMinusOne = (XIsLHS && Pred2 == CmpInst::ICMP_ULT) ||
+                        (!XIsLHS && Pred2 == CmpInst::ICMP_UGT) ||
+                        (XIsLHS && Pred2 == CmpInst::ICMP_UGE) ||
+                        (!XIsLHS && Pred2 == CmpInst::ICMP_ULE);
+
+    // Check if the High value matches with the one in prev selection.
+    // It's OK to compare ZExted values because they are both supposed to be
+    // positive values. We won't match two negative values of different types.
+    uint64_t High2Val = High2->getZExtValue();
+    if (NeedMinusOne)
+      --High2Val;
+    if (High2Val != High->getZExtValue())
+      return false;
+
+    Value *TrueValue = SI2->getTrueValue();
+    Value *FalseValue = SI2->getFalseValue();
+
+    if (XIsLT && FalseValue == SI) {
+      OpX = TrueValue;
+      return true;
+    }
+    if (!XIsLT && TrueValue == SI) {
+      OpX = FalseValue;
+      return true;
+    }
+  }
+  return false;
+}
+
+static Value *changeToSat(const ConstantInt *Low, const ConstantInt *High,
+                          Value *X, Instruction *InsertBefore, Module *M) {
+  Function *USatFunc = Intrinsic::getDeclaration(M, Intrinsic::arm_usat);
+  Function *SSatFunc = Intrinsic::getDeclaration(M, Intrinsic::arm_ssat);
+
+  Value *Args[2];
+  Type *VT = USatFunc->getFunctionType()->getParamType(0);
+
+  // If the data size of X is larger than that of usat/ssat's first paramter (
+  // the value to be saturated), then it's unsafe to saturate because after
+  // truncating, the original out-of-range value might become in-range.
+  // If the data size of X is smaller, it will be extended and the result
+  // needs to be truncated.
+
+  if (X->getType()->getScalarSizeInBits() > VT->getScalarSizeInBits())
+    return NULL;
+  bool NeedsCasting =
+      X->getType()->getScalarSizeInBits() < VT->getScalarSizeInBits();
+
+  // Check if the range matches that of usat: [0, 2^n-1].
+  if (Low->isZero()) {
+    unsigned Amount = 0;
+    if (!High->isNegative() && (High->getValue() + 1).isPowerOf2())
+      Amount = (High->getValue() + 1).exactLogBase2();
+    else if (High->isMinusOne())
+      Amount = High->getBitWidth();
+    if (Amount >= 1 && Amount <= 32) {
+      Args[0] = !NeedsCasting ? X : CastInst::Create(Instruction::ZExt, X, VT,
+                                                     "", InsertBefore);
+      Args[1] = ConstantInt::get(VT, Amount);
+      Value *CI = CallInst::Create(USatFunc, Args, "", InsertBefore);
+      return !NeedsCasting ? CI
+                           : CastInst::Create(Instruction::Trunc, CI,
+                                              X->getType(), "", InsertBefore);
+    }
+  }
+  // Check if the range matches that of ssat: [-2^{n-1}, 2^{n-1} -1].
+  if (Low->isNegative() && !High->isNegative() &&
+      Low->getValue().abs() == (High->getValue() + 1) &&
+      Low->getValue().abs().isPowerOf2()) {
+    unsigned Amount = Low->getValue().abs().exactLogBase2() + 1;
+    if (Amount >= 1 && Amount <= 32) {
+      Args[0] = !NeedsCasting ? X : CastInst::Create(Instruction::SExt, X, VT,
+                                                     "", InsertBefore);
+      // The SSAT intrinsic will increase it by 1.
+      Args[1] = ConstantInt::get(VT, Amount - 1);
+      Value *CI = CallInst::Create(SSatFunc, Args, "", InsertBefore);
+      return !NeedsCasting ? CI
+                           : CastInst::Create(Instruction::Trunc, CI,
+                                              X->getType(), "", InsertBefore);
+    }
+  }
+  return NULL;
+}
+
+static bool changeToARMIntrinsics(BasicBlock &BB) {
+  bool Changed = false;
+
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+    SelectInst *SI = dyn_cast<SelectInst>(I);
+    if (!SI || !SI->hasOneUse())
+      continue;
+    SelectInst *SI2 = dyn_cast<SelectInst>(*SI->user_begin());
+    if (!SI2 || SI2->getParent() != &BB)
+      continue;
+
+    Value *X = NULL, *OpX = NULL;
+    const ConstantInt *Low = NULL, *High = NULL;
+
+    if (!isCanonicalSaturate(SI, SI2, X, Low, High, OpX) &&
+        !isSpecialSaturate(SI, SI2, X, Low, High, OpX))
+      continue;
+
+    SmallVector<Instruction *, 5> Ops;
+    if (!OpX || (X != OpX && !isFactorableOp(X, OpX, Ops)))
+      continue;
+
+    Instruction *InsertBefore = Ops.empty() ? SI2 : *Ops.rbegin();
+
+    Module *M = BB.getParent()->getParent();
+    Value *NewCall = changeToSat(Low, High, X, InsertBefore, M);
+
+    if (NewCall) {
+      DEBUG(dbgs() << "Replacing:" << *SI << " and " << *SI2 << "\n");
+      ++NumSelSimpl;
+      Changed = true;
+      // Apply the factorable ops to saturating result.
+      if (Ops.size()) {
+        dyn_cast<Instruction>(OpX)->setOperand(0, NewCall);
+        NewCall = OpX;
+      }
+      SI2->replaceAllUsesWith(NewCall);
+      E = BB.end();
+    }
+  }
+  return Changed;
+}
+
+bool SimplifySelect::runOnFunction(Function &F) {
+  bool Changed = false;
+  llvm::Triple TargetTriple(F.getParent()->getTargetTriple());
+
+  StringRef Arch = TargetTriple.getArchName();
+  // Only ARMv6 or above and Thumb supports usat/ssat.
+  if (Arch.startswith("armv6") || Arch.startswith("armv7") ||
+      Arch.startswith("thumb"))
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ++BBIt)
+      Changed |= (changeToARMIntrinsics(*BBIt));
+
+  return Changed;
+}
Index: test/CodeGen/ARM/intrinsics-sat.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/intrinsics-sat.ll
@@ -0,0 +1,497 @@
+; RUN: opt < %s -O3 -mtriple=armv7-none-linux-gnueabi -enable-select-to-intrinsics -S | FileCheck %s
+; RUN: opt < %s -O3 -mtriple=armv7-none-linux-gnueabi -enable-select-to-intrinsics -S | llc | FileCheck --check-prefix=CHECK-LLC %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+;return x < -64 ? -64 : (x > 63 ? 63 : x);
+define i32 @ssat1(i32 %x) nounwind readnone {
+;CHECK: @ssat1
+;CHECK-LLC: ssat1:
+entry:
+;CHECK: call i32 @llvm.arm.ssat(i32 %x, i32 6)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: ssat r{{[0-9]}}, #7
+  %cmp1 = icmp sgt i32 %x, 63
+  %sel1 = select i1 %cmp1, i32 63, i32 %x
+  %cmp = icmp slt i32 %x, -64
+  %sel2 = select i1 %cmp, i32 -64, i32 %sel1
+  ret i32 %sel2
+}
+
+;return x < -1 ? -1 : (x > 0 ? 0 : x); //Corner case
+define i32 @ssat2(i32 %x) nounwind readnone {
+;CHECK: @ssat2
+;CHECK-LLC: ssat2:
+entry:
+;CHECK: call i32 @llvm.arm.ssat(i32 %x, i32 0)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: ssat r{{[0-9]}}, #1
+  %cmp = icmp slt i32 %x, -1
+  %cmp1 = icmp sgt i32 %x, 0
+  %sel1 = select i1 %cmp1, i32 0, i32 %x
+  %sel3 = select i1 %cmp, i32 -1, i32 %sel1
+  ret i32 %sel3
+}
+
+; return x > 255 ? 255 : (x < 0 ? 0 : x);
+define i32 @usat1(i32 %x) nounwind readnone {
+; CHECK: @usat1
+; CHECK-LLC: usat1:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp sgt i32 %x, 255
+  %cmp1 = icmp slt i32 %x, 0
+  %sel1 = select i1 %cmp1, i32 0, i32 %x
+  %sel2 = select i1 %cmp, i32 255, i32 %sel1
+  ret i32 %sel2
+}
+
+;return x < 0 ? 0 : (x > 1 ? 1 : x);  //Corner case
+define i32 @usat_one(i32 %x) nounwind readnone {
+; CHECK: @usat_one
+; CHECK-LLC: usat_one:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 1)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #1
+  %cmp = icmp slt i32 %x, 0
+  %cmp1 = icmp sgt i32 %x, 1
+  %sel3 = select i1 %cmp1, i32 1, i32 %x
+  %sel8 = select i1 %cmp, i32 0, i32 %sel3
+  ret i32 %sel8
+}
+
+
+;return x < 0 ? 0 : (x > 31 ? 31 : x);
+define i32 @usat2(i32 %x) nounwind readnone {
+; CHECK: @usat2
+; CHECK-LLC: usat2:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 5)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #5
+  %cmp = icmp slt i32 %x, 0
+  %cmp1 = icmp sgt i32 %x, 31
+  %sel3 = select i1 %cmp1, i32 31, i32 %x
+  %sel4 = select i1 %cmp, i32 0, i32 %sel3
+  ret i32 %sel4
+}
+
+; return (unsigned)x < 256 ? (char)x : (x < 0 ? 0 : 255)
+define i8 @usat3(i32 %a) nounwind readnone {
+; CHECK: @usat3
+; CHECK-LLC: usat3:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp ult i32 %a, 256
+  %conv = trunc i32 %a to i8
+  %cmp1 = icmp slt i32 %a, 0
+  %selv = select i1 %cmp1, i8 0, i8 -1
+  %selv1 = select i1 %cmp, i8 %conv, i8 %selv
+  ret i8 %selv1
+}
+
+; return (unsigned)x >= 256 ? (0 > x ? 0 : 255) : x
+define i32 @usat4(i32 %a) nounwind readnone {
+; CHECK: @usat4
+; CHECK-LLC: usat4:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp uge i32 %a, 256
+  %cmp1 = icmp sgt i32 0, %a
+  %selv = select i1 %cmp1, i32 0, i32 255
+  %selv1 = select i1 %cmp, i32 %selv, i32 %a
+  ret i32 %selv1
+}
+
+; return (unsigned)x <= 255 ? x:  (x < 0 ? 0 : 255)
+define i32 @usat5(i32 %a) nounwind readnone {
+; CHECK: @usat5
+; CHECK-LLC: usat5:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp ule i32 %a, 255
+  %cmp1 = icmp sgt i32 0, %a
+  %selv = select i1 %cmp1, i32 0, i32 255
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+
+; return (unsigned)x <= 256 ? x:  (x < 0 ? 0 : 255)
+define i32 @not_usat5(i32 %a) nounwind readnone {
+;CHECK: not_usat5
+;CHECK-LLC: not_usat5:
+entry:
+;CHECK-NOT: @llvm.arm.usat
+;CHECK-LLC-NOT: usat r{{[0-9]}}
+;CHECK: cmp
+;CHECK: select
+  %cmp = icmp ule i32 %a, 256
+  %cmp1 = icmp sgt i32 0, %a
+  %selv = select i1 %cmp1, i32 0, i32 255
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+
+; return (unsigned)x < 256 ? x:  (x > 0 ? 255 : 0)
+define i32 @usat6(i32 %a) nounwind readnone {
+; CHECK: @usat6
+; CHECK-LLC: usat6:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp ult i32 %a, 256
+  %cmp1 = icmp sgt i32 %a, 0
+  %selv = select i1 %cmp1, i32 255, i32 0
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+
+; return (unsigned)x < 256 ? x:  (x >= 0 ? 255 : 0)
+define i32 @not_usat6(i32 %a) nounwind readnone {
+; CHECK: @not_usat6
+; CHECK-LLC: not_usat6:
+entry:
+;CHECK-NOT: @llvm.arm.usat
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: usat r{{[0-9]}}
+  %cmp = icmp ult i32 %a, 256
+  %cmp1 = icmp sge i32 %a, 0
+  %selv = select i1 %cmp1, i32 255, i32 0
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+
+; return (unsigned)x < 256 ? x:  (x >= 255 ? 255 : 0)
+define i32 @usat7(i32 %a) nounwind readnone {
+; CHECK: @usat7
+; CHECK-LLC: usat7:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp ult i32 %a, 256
+  %cmp1 = icmp sge i32 %a, 255
+  %selv = select i1 %cmp1, i32 255, i32 0
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+
+; return (unsigned)x < 256 ? x:  (x < 255 ? 0 : 255)
+define i32 @usat8(i32 %a) nounwind readnone {
+; CHECK: @usat8
+; CHECK-LLC: usat8:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp ult i32 %a, 256
+  %cmp1 = icmp slt i32 %a, 255
+  %selv = select i1 %cmp1, i32 0, i32 255
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+; return (unsigned)x < 256 ? x:  (x <= 255 ? 0 : 255)
+define i32 @not_usat8(i32 %a) nounwind readnone {
+; CHECK: @not_usat8
+; CHECK-LLC: not_usat8:
+entry:
+;CHECK-NOT: @llvm.arm.usat
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: usat r{{[0-9]}}
+  %cmp = icmp ult i32 %a, 256
+  %cmp1 = icmp sle i32 %a, 255
+  %selv = select i1 %cmp1, i32 0, i32 255
+  %selv1 = select i1 %cmp, i32 %a, i32 %selv
+  ret i32 %selv1
+}
+
+; return (a<0 ? 0 : (unsigned) a < 256 ? (char) a : 255)
+define zeroext i8 @usat9(i32 %a) nounwind readnone {
+; CHECK: @usat9
+; CHECK-LLC: usat9:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %a, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp slt i32 %a, 0
+  %cmp1 = icmp ult i32 %a, 256
+  %conv = trunc i32 %a to i8
+  %selv = select i1 %cmp1, i8 %conv, i8 -1
+  %selv1 = select i1 %cmp, i8 0, i8 %selv
+  ret i8 %selv1
+}
+
+; return ((x<0 ? 0 : (x > 255 ? 255 : x)) >> 2) << 5; }
+define i32 @usat_shifts(i32 %x) nounwind {
+;CHECK: @usat_shifts
+;CHECK-LLC: usat_shifts:
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %x, i32 8)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end4
+
+cond.false:                                       ; preds = %entry
+  %1 = load i32, i32* %x.addr, align 4
+  %cmp1 = icmp sgt i32 %1, 255
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.false
+  br label %cond.end
+
+cond.false3:                                      ; preds = %cond.false
+  %2 = load i32, i32* %x.addr, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false3, %cond.true2
+  %cond = phi i32 [ 255, %cond.true2 ], [ %2, %cond.false3 ]
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.end, %cond.true
+  %cond5 = phi i32 [ 0, %cond.true ], [ %cond, %cond.end ]
+  %shr = ashr i32 %cond5, 2
+  %shl = shl i32 %shr, 5
+  ret i32 %shl
+}
+
+; Negative tests
+; return x < 0 ? 3 : (x > 255 ? 255 : x);
+define i32 @not_usat1(i32 %x) nounwind readnone {
+;CHECK: @not_usat1
+;CHECK-LLC: not_usat1:
+entry:
+;CHECK-NOT: @llvm.arm.
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: sat r
+  %cmp = icmp slt i32 %x, 0
+  %cmp1 = icmp sgt i32 %x, 255
+  %sel9 = select i1 %cmp1, i32 255, i32 %x
+  %sel10 = select i1 %cmp, i32 3, i32 %sel9
+  ret i32 %sel10
+}
+
+; return x < 0 ? 0 : (x > 256 ? 256 : x);
+define i32 @not_usat2(i32 %x) nounwind readnone {
+;CHECK: @not_usat2
+;CHECK-LLC: not_usat2:
+entry:
+;CHECK-NOT: @llvm.arm.
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: sat r
+  %cmp = icmp slt i32 %x, 0
+  %cmp1 = icmp sgt i32 %x, 256
+  %sel11 = select i1 %cmp1, i32 256, i32 %x
+  %sel12 = select i1 %cmp, i32 0, i32 %sel11
+  ret i32 %sel12
+}
+
+; return x < 0 ? 0 : (x > 255 ? x : 255);
+define i32 @not_usat3(i32 %x) nounwind readnone {
+;CHECK: @not_usat3
+;CHECK-LLC: not_usat3:
+entry:
+;CHECK-NOT: @llvm.arm.
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: sat r
+  %cmp = icmp slt i32 %x, 0
+  %cmp1 = icmp sgt i32 %x, 255
+  %sel13 = select i1 %cmp1, i32 %x, i32 255
+  %sel14 = select i1 %cmp, i32 0, i32 %sel13
+  ret i32 %sel14
+}
+
+; return x < 0 ? (x > 255 ? 255 : x) : 0;
+define i32 @not_usat4(i32 %x) nounwind readnone {
+;CHECK: @not_usat4
+;CHECK-LLC: not_usat4:
+entry:
+;CHECK-NOT: @llvm.arm.
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: sat r
+  %cmp = icmp slt i32 %x, 0
+  %cmp1 = icmp sgt i32 %x, 255
+  %sel15 = select i1 %cmp1, i32 255, i32 %x
+  %sel16 = select i1 %cmp, i32 %sel15, i32 0
+  ret i32 %sel16
+}
+
+; return x > 0 ? 0 : (x < 0 ? 0 : x);
+; This will be optimized to ret 0 by the select-of-select instcombine
+define i32 @not_usat_zero(i32 %x) nounwind readnone {
+;CHECK: @not_usat_zero
+;CHECK-LLC: not_usat_zero:
+entry:
+; CHECK-NOT: @llvm.arm.
+; CHECK-LLC-NOT: sat r
+  %cmp = icmp sgt i32 %x, 0
+  %cmp1 = icmp slt i32 %x, 0
+  %sel5 = select i1 %cmp1, i32 0, i32 %x
+  %sel6 = select i1 %cmp, i32 0, i32 %sel5
+  ret i32 %sel6
+}
+
+; return x < -64 ? -64 : (x > 64 ? 64 : x);
+define i32 @not_ssat1(i32 %x) nounwind readnone {
+;CHECK: @not_ssat1
+;CHECK-LLC: not_ssat1
+entry:
+; CHECK-NOT: @llvm.arm.
+; CHECK: cmp
+; CHECK: select
+; CHECK-LLC-NOT: sat r
+  %cmp = icmp slt i32 %x, -64
+  %cmp1 = icmp sgt i32 %x, 64
+  %sel3 = select i1 %cmp1, i32 64, i32 %x
+  %sel4 = select i1 %cmp, i32 -64, i32 %sel3
+  ret i32 %sel4
+}
+
+define i64 @not_ssat_i64(i64 %x) nounwind readnone {
+;CHECK: @not_ssat_i64
+;CHECK-LLC: not_ssat_i64:
+entry:
+;CHECK-NOT: @llvm.arm.
+;CHECK: cmp
+;CHECK: select
+;CHECK-LLC-NOT: sat r
+  %cmp = icmp slt i64 %x, -64
+  %cmp1 = icmp sgt i64 %x, 63
+  %sel1 = select i1 %cmp1, i64 63, i64 %x
+  %sel2 = select i1 %cmp, i64 -64, i64 %sel1
+  ret i64 %sel2
+}
+
+;return x < -64 ? -64 : (x > 63 ? 63 : x);
+define i8 @ssat_i8(i8 %x) nounwind readnone {
+;CHECK: @ssat_i8
+;CHECK-LLC: ssat_i8:
+entry:
+;CHECK: sext i8 %x to i32
+;CHECK: call i32 @llvm.arm.ssat(i32 %{{.*}}, i32 6)
+;CHECK: trunc i32 %{{.*}} to i8
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: ssat r{{[0-9]}}, #7
+  %cmp = icmp slt i8 %x, -64
+  %cmp1 = icmp sgt i8 %x, 63
+  %sel1 = select i1 %cmp1, i8 63, i8 %x
+  %sel2 = select i1 %cmp, i8 -64, i8 %sel1
+  ret i8 %sel2
+}
+
+define i8 @usat_i8(i8 %x) nounwind readnone {
+; CHECK: @usat_i8
+; CHECK-LLC: usat_i8:
+entry:
+;CHECK: zext i8 %x to i32
+;CHECK: call i32 @llvm.arm.usat(i32 %{{.*}}, i32 8)
+;CHECK: trunc i32 %{{.*}} to i8
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #8
+  %cmp = icmp sgt i8 %x, 255
+  %cmp1 = icmp slt i8 %x, 0
+  %sel1 = select i1 %cmp1, i8 0, i8 %x
+  %sel2 = select i1 %cmp, i8 255, i8 %sel1
+  ret i8 %sel2
+}
+
+define i16 @trunc_to_i16(i32 %add) {
+;CHECK-LABEL: @trunc_to_i16
+entry:
+;CHECK: call i32 @llvm.arm.ssat(i32 %{{.*}}, i32 15)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: ssat r{{[0-9]}}, #16
+  %cmp = icmp slt i32 %add, -32768
+  %cmp2 = icmp sgt i32 %add, 32767
+  %0 = trunc i32 %add to i16
+  %phitmp = select i1 %cmp2, i16 32767, i16 %0
+  %cond7 = select i1 %cmp, i16 -32768, i16 %phitmp
+  ret i16 %cond7
+}
+
+define i16 @trunc_to_i16_u(i32 %add) {
+;CHECK-LABEL: @trunc_to_i16_u
+entry:
+;CHECK: call i32 @llvm.arm.usat(i32 %{{.*}}, i32 16)
+;CHECK-NOT: cmp
+;CHECK-NOT: select
+;CHECK-LLC: usat r{{[0-9]}}, #16
+  %cmp = icmp slt i32 %add, 0
+  %cmp2 = icmp sgt i32 %add, 65535
+  %0 = trunc i32 %add to i16
+  %phitmp = select i1 %cmp2, i16 65535, i16 %0
+  %cond7 = select i1 %cmp, i16 0, i16 %phitmp
+  ret i16 %cond7
+}
+
+; CHECK: @foo1
+; CHECK: @llvm.arm.usat(i32 {{%[0-9]}}, i32 8)
+define void @foo1(i8* %Dst, i16* %Src) #0 {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %Src, i32 7
+  %0 = load i16, i16* %arrayidx, align 2
+  %cmp = icmp slt i16 %0, 0
+  %cmp4 = icmp sgt i16 %0, 255
+  %1 = trunc i16 %0 to i8
+  %phitmp = select i1 %cmp4, i8 -1, i8 %1
+  %selv = select i1 %cmp, i8 0, i8 %phitmp
+  %arrayidx13 = getelementptr inbounds i8, i8* %Dst, i32 15
+  store i8 %selv, i8* %arrayidx13, align 1
+  ret void
+}
+
+; CHECK: @foo2
+; CHECK: @llvm.arm.ssat(i32 {{%[0-9]}}, i32 8)
+define void @foo2(i8* %Dst, i16* %Src) {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %Src, i32 7
+  %0 = load i16, i16* %arrayidx, align 2
+  %1 = sext i16 %0 to i32
+  %cmp = icmp slt i32 %1, -256
+  %cmp1 = icmp sgt i32 %1, 255
+  %sel1 = select i1 %cmp1, i32 255, i32 %1
+  %sel3 = select i1 %cmp, i32 -256, i32 %sel1
+  %2 = trunc i32 %sel3 to i8
+  %arrayidx13 = getelementptr inbounds i8, i8* %Dst, i32 15
+  store i8 %2, i8* %arrayidx13, align 1
+  ret void
+}