Index: llvm/trunk/include/llvm/InitializePasses.h
===================================================================
--- llvm/trunk/include/llvm/InitializePasses.h
+++ llvm/trunk/include/llvm/InitializePasses.h
@@ -142,6 +142,7 @@
 void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeGCMachineCodeAnalysisPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
+void initializeGuardWideningLegacyPassPass(PassRegistry&);
 void initializeGVNLegacyPassPass(PassRegistry&);
 void initializeGlobalDCELegacyPassPass(PassRegistry&);
 void initializeGlobalOptLegacyPassPass(PassRegistry&);
Index: llvm/trunk/include/llvm/LinkAllPasses.h
===================================================================
--- llvm/trunk/include/llvm/LinkAllPasses.h
+++ llvm/trunk/include/llvm/LinkAllPasses.h
@@ -99,6 +99,7 @@
       (void) llvm::createGlobalDCEPass();
       (void) llvm::createGlobalOptimizerPass();
       (void) llvm::createGlobalsAAWrapperPass();
+      (void) llvm::createGuardWideningPass();
       (void) llvm::createIPConstantPropagationPass();
       (void) llvm::createIPSCCPPass();
       (void) llvm::createInductiveRangeCheckEliminationPass();
Index: llvm/trunk/include/llvm/Transforms/Scalar.h
===================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar.h
+++ llvm/trunk/include/llvm/Transforms/Scalar.h
@@ -81,6 +81,16 @@
 //
 FunctionPass *createAggressiveDCEPass();
 
+
+//===----------------------------------------------------------------------===//
+//
+// GuardWidening - An optimization over the @llvm.experimental.guard intrinsic
+// that (optimistically) combines multiple guards into one to have fewer checks
+// at runtime.
+//
+FunctionPass *createGuardWideningPass();
+
+
 //===----------------------------------------------------------------------===//
 //
 // BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to
Index: llvm/trunk/include/llvm/Transforms/Scalar/GuardWidening.h
===================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/GuardWidening.h
+++ llvm/trunk/include/llvm/Transforms/Scalar/GuardWidening.h
@@ -0,0 +1,32 @@
+//===- GuardWidening.h - ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Guard widening is an optimization over the @llvm.experimental.guard intrinsic
+// that (optimistically) combines multiple guards into one to have fewer checks
+// at runtime.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
+#define LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct GuardWideningPass : public PassInfoMixin<GuardWideningPass> {
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+};
+}
+
+
+#endif  // LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
Index: llvm/trunk/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp
+++ llvm/trunk/lib/Passes/PassBuilder.cpp
@@ -66,6 +66,7 @@
 #include "llvm/Transforms/Scalar/DCE.h"
 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/GuardWidening.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
Index: llvm/trunk/lib/Passes/PassRegistry.def
===================================================================
--- llvm/trunk/lib/Passes/PassRegistry.def
+++ llvm/trunk/lib/Passes/PassRegistry.def
@@ -118,6 +118,7 @@
 FUNCTION_PASS("no-op-function", NoOpFunctionPass())
 FUNCTION_PASS("loweratomic", LowerAtomicPass())
 FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn", GVN())
 FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
 FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
Index: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
+++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
@@ -10,6 +10,7 @@
   EarlyCSE.cpp
   FlattenCFGPass.cpp
   Float2Int.cpp
+  GuardWidening.cpp
   GVN.cpp
   InductiveRangeCheckElimination.cpp
   IndVarSimplify.cpp
Index: llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp
+++ llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp
@@ -0,0 +1,428 @@
+//===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the guard widening pass.  The semantics of the
+// @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails
+// more often that it did before the transform.  This optimization is called
+// "widening" and can be used hoist and common runtime checks in situations like
+// these:
+//
+//    %cmp0 = 7 u< Length
+//    call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
+//    call @unknown_side_effects()
+//    %cmp1 = 9 u< Length
+//    call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]
+//    ...
+//
+// =>
+//
+//    %cmp0 = 9 u< Length
+//    call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]
+//    call @unknown_side_effects()
+//    ...
+//
+// If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a
+// generic implementation of the same function, which will have the correct
+// semantics from that point onward.  It is always _legal_ to deoptimize (so
+// replacing %cmp0 with false is "correct"), though it may not always be
+// profitable to do so.
+//
+// NB! This pass is a work in progress.  It hasn't been tuned to be "production
+// ready" yet.  It is known to have quadriatic running time and will not scale
+// to large numbers of guards
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/GuardWidening.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "guard-widening"
+
+namespace {
+
+class GuardWideningImpl {
+  DominatorTree &DT;
+  PostDominatorTree &PDT;
+  LoopInfo &LI;
+
+  /// The set of guards whose conditions have been widened into dominating
+  /// guards.
+  SmallVector<IntrinsicInst *, 16> EliminatedGuards;
+
+  /// The set of guards which have been widened to include conditions to other
+  /// guards.
+  DenseSet<IntrinsicInst *> WidenedGuards;
+
+  /// Try to eliminate guard \p Guard by widening it into an earlier dominating
+  /// guard.  \p DFSI is the DFS iterator on the dominator tree that is
+  /// currently visiting the block containing \p Guard, and \p GuardsPerBlock
+  /// maps BasicBlocks to the set of guards seen in that block.
+  bool eliminateGuardViaWidening(
+      IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI,
+      const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+          GuardsPerBlock);
+
+  /// Used to keep track of which widening potential is more effective.
+  enum WideningScore {
+    /// Don't widen.
+    WS_IllegalOrNegative,
+
+    /// Widening is performance neutral as far as the cycles spent in check
+    /// conditions goes (but can still help, e.g., code layout, having less
+    /// deopt state).
+    WS_Neutral,
+
+    /// Widening is profitable.
+    WS_Positive,
+
+    /// Widening is very profitable.  Not significantly different from \c
+    /// WS_Positive, except by the order.
+    WS_VeryPositive
+  };
+
+  static StringRef scoreTypeToString(WideningScore WS);
+
+  /// Compute the score for widening the condition in \p DominatedGuard
+  /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
+  /// \p DominatingGuardLoop).
+  WideningScore computeWideningScore(IntrinsicInst *DominatedGuard,
+                                     Loop *DominatedGuardLoop,
+                                     IntrinsicInst *DominatingGuard,
+                                     Loop *DominatingGuardLoop);
+
+  /// Helper to check if \p V can be hoisted to \p InsertPos.
+  bool isAvailableAt(Value *V, Instruction *InsertPos) {
+    SmallPtrSet<Instruction *, 8> Visited;
+    return isAvailableAt(V, InsertPos, Visited);
+  }
+
+  bool isAvailableAt(Value *V, Instruction *InsertPos,
+                     SmallPtrSetImpl<Instruction *> &Visited);
+
+  /// Helper to hoist \p V to \p InsertPos.  Guaranteed to succeed if \c
+  /// isAvailableAt returned true.
+  void makeAvailableAt(Value *V, Instruction *InsertPos);
+
+  /// Common helper used by \c widenGuard and \c isWideningCondProfitable.  Try
+  /// to generate an expression computing the logical AND of \p Cond0 and \p
+  /// Cond1.  Return true if the expression computing the AND is only as
+  /// expensive as computing one of the two. If \p InsertPt is true then
+  /// actually generate the resulting expression, make it available at \p
+  /// InsertPt and return it in \p Result (else no change to the IR is made).
+  bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
+                       Value *&Result);
+
+  /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
+  /// computing only one of the two expressions?
+  bool isWideningCondProfitable(Value *Cond0, Value *Cond1) {
+    Value *ResultUnused;
+    return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused);
+  }
+
+  /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
+  /// whatever it is already checking).
+  void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) {
+    Value *Result;
+    widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result);
+    ToWiden->setArgOperand(0, Result);
+  }
+
+public:
+  explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT,
+                             LoopInfo &LI)
+      : DT(DT), PDT(PDT), LI(LI) {}
+
+  /// The entry point for this pass.
+  bool run();
+};
+
+struct GuardWideningLegacyPass : public FunctionPass {
+  static char ID;
+  GuardWideningPass Impl;
+
+  GuardWideningLegacyPass() : FunctionPass(ID) {
+    initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+    return GuardWideningImpl(
+               getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+               getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(),
+               getAnalysis<LoopInfoWrapperPass>().getLoopInfo()).run();
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+  }
+};
+
+}
+
+bool GuardWideningImpl::run() {
+  using namespace llvm::PatternMatch;
+
+  DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock;
+  bool Changed = false;
+
+  for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
+       DFI != DFE; ++DFI) {
+    auto *BB = (*DFI)->getBlock();
+    auto &CurrentList = GuardsInBlock[BB];
+
+    for (auto &I : *BB)
+      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
+        CurrentList.push_back(cast<IntrinsicInst>(&I));
+
+    for (auto *II : CurrentList)
+      Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+  }
+
+  for (auto *II : EliminatedGuards)
+    if (!WidenedGuards.count(II))
+      II->eraseFromParent();
+
+  return Changed;
+}
+
+bool GuardWideningImpl::eliminateGuardViaWidening(
+    IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+    const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+        GuardsInBlock) {
+  IntrinsicInst *BestSoFar = nullptr;
+  auto BestScoreSoFar = WS_IllegalOrNegative;
+  auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
+
+  // In the set of dominating guards, find the one we can merge GuardInst with
+  // for the most profit.
+  for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {
+    auto *CurBB = DFSI.getPath(i)->getBlock();
+    auto *CurLoop = LI.getLoopFor(CurBB);
+    assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
+    const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
+
+    auto I = GuardsInCurBB.begin();
+    auto E = GuardsInCurBB.end();
+
+#ifndef NDEBUG
+    {
+      unsigned Index = 0;
+      for (auto &I : *CurBB) {
+        if (Index == GuardsInCurBB.size())
+          break;
+        if (GuardsInCurBB[Index] == &I)
+          Index++;
+      }
+      assert(Index == GuardsInCurBB.size() &&
+             "Guards expected to be in order!");
+    }
+#endif
+
+    assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
+
+    if (i == (e - 1)) {
+      // Corner case: make sure we're only looking at guards strictly dominating
+      // GuardInst when visiting GuardInst->getParent().
+      auto NewEnd = std::find(I, E, GuardInst);
+      assert(NewEnd != E && "GuardInst not in its own block?");
+      E = NewEnd;
+    }
+
+    for (auto *Candidate : make_range(I, E)) {
+      auto Score =
+          computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
+      DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0)
+                   << " and " << *Candidate->getArgOperand(0) << " is "
+                   << scoreTypeToString(Score) << "\n");
+      if (Score > BestScoreSoFar) {
+        BestScoreSoFar = Score;
+        BestSoFar = Candidate;
+      }
+    }
+  }
+
+  if (BestScoreSoFar == WS_IllegalOrNegative) {
+    DEBUG(dbgs() << "Did not eliminate guard " << *GuardInst << "\n");
+    return false;
+  }
+
+  assert(BestSoFar != GuardInst && "Should have never visited same guard!");
+  assert(DT.dominates(BestSoFar, GuardInst) && "Should be!");
+
+  DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
+               << " with score " << scoreTypeToString(BestScoreSoFar) << "\n");
+  widenGuard(BestSoFar, GuardInst->getArgOperand(0));
+  GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext()));
+  EliminatedGuards.push_back(GuardInst);
+  WidenedGuards.insert(BestSoFar);
+  return true;
+}
+
+GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
+    IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop,
+    IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) {
+  bool HoistingOutOfLoop = false;
+
+  if (DominatingGuardLoop != DominatedGuardLoop) {
+    if (DominatingGuardLoop &&
+        !DominatingGuardLoop->contains(DominatedGuardLoop))
+      return WS_IllegalOrNegative;
+
+    HoistingOutOfLoop = true;
+  }
+
+  if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard))
+    return WS_IllegalOrNegative;
+
+  bool HoistingOutOfIf =
+      !PDT.dominates(DominatedGuard->getParent(), DominatingGuard->getParent());
+
+  if (isWideningCondProfitable(DominatedGuard->getArgOperand(0),
+                               DominatingGuard->getArgOperand(0)))
+    return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
+
+  if (HoistingOutOfLoop)
+    return WS_Positive;
+
+  return HoistingOutOfIf ? WS_IllegalOrNegative : WS_Neutral;
+}
+
+bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
+                                      SmallPtrSetImpl<Instruction *> &Visited) {
+  auto *Inst = dyn_cast<Instruction>(V);
+  if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
+    return true;
+
+  if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) ||
+      Inst->mayReadFromMemory())
+    return false;
+
+  Visited.insert(Inst);
+
+  // We only want to go _up_ the dominance chain when recursing.
+  assert(!isa<PHINode>(Loc) &&
+         "PHIs should return false for isSafeToSpeculativelyExecute");
+  assert(DT.isReachableFromEntry(Inst->getParent()) &&
+         "We did a DFS from the block entry!");
+  return all_of(Inst->operands(),
+                [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
+}
+
+void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
+  auto *Inst = dyn_cast<Instruction>(V);
+  if (!Inst || DT.dominates(Inst, Loc))
+    return;
+
+  assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) &&
+         !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
+
+  for (Value *Op : Inst->operands())
+    makeAvailableAt(Op, Loc);
+
+  Inst->moveBefore(Loc);
+}
+
+bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
+                                        Instruction *InsertPt, Value *&Result) {
+  using namespace llvm::PatternMatch;
+
+  {
+    // L >u C0 && L >u C1  ->  L >u max(C0, C1)
+    ConstantInt *RHS0, *RHS1;
+    Value *LHS;
+    ICmpInst::Predicate Pred0, Pred1;
+    if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
+        match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
+
+      // TODO: This logic should be generalized and refactored into a new
+      // Constant::getEquivalentICmp helper.
+      if (Pred0 == ICmpInst::ICMP_NE && RHS0->isZero())
+        Pred0 = ICmpInst::ICMP_UGT;
+      if (Pred1 == ICmpInst::ICMP_NE && RHS1->isZero())
+        Pred1 = ICmpInst::ICMP_UGT;
+
+      if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_UGT) {
+        if (InsertPt) {
+          ConstantInt *NewRHS =
+              RHS0->getValue().ugt(RHS1->getValue()) ? RHS0 : RHS1;
+          Result = new ICmpInst(InsertPt, ICmpInst::ICMP_UGT, LHS, NewRHS,
+                                "wide.chk");
+        }
+
+        return true;
+      }
+    }
+  }
+
+  // Base case -- just logical-and the two conditions together.
+
+  if (InsertPt) {
+    makeAvailableAt(Cond0, InsertPt);
+    makeAvailableAt(Cond1, InsertPt);
+
+    Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
+  }
+
+  // We were not able to compute Cond0 AND Cond1 for the price of one.
+  return false;
+}
+
+PreservedAnalyses GuardWideningPass::run(Function &F,
+                                         AnalysisManager<Function> &AM) {
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &LI = AM.getResult<LoopAnalysis>(F);
+  auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+  bool Changed = GuardWideningImpl(DT, PDT, LI).run();
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
+  switch (WS) {
+  case WS_IllegalOrNegative:
+    return "IllegalOrNegative";
+  case WS_Neutral:
+    return "Neutral";
+  case WS_Positive:
+    return "Positive";
+  case WS_VeryPositive:
+    return "VeryPositive";
+  }
+
+  llvm_unreachable("Fully covered switch above!");
+}
+
+char GuardWideningLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
+                    false, false)
+
+FunctionPass *llvm::createGuardWideningPass() {
+  return new GuardWideningLegacyPass();
+}
Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
+++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
@@ -41,6 +41,7 @@
   initializeDeadInstEliminationPass(Registry);
   initializeScalarizerPass(Registry);
   initializeDSELegacyPassPass(Registry);
+  initializeGuardWideningLegacyPassPass(Registry);
   initializeGVNLegacyPassPass(Registry);
   initializeEarlyCSELegacyPassPass(Registry);
   initializeFlattenCFGPassPass(Registry);
Index: llvm/trunk/test/Transforms/GuardWidening/basic.ll
===================================================================
--- llvm/trunk/test/Transforms/GuardWidening/basic.ll
+++ llvm/trunk/test/Transforms/GuardWidening/basic.ll
@@ -0,0 +1,337 @@
+; RUN: opt -S -guard-widening < %s        | FileCheck %s
+; RUN: opt -S -passes=guard-widening < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+; Basic test case: we wide the first check to check both the
+; conditions.
+define void @f_0(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_0(
+entry:
+; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  ret void
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+}
+
+; Same as @f_0, but with using a more general notion of postdominance.
+define void @f_1(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_1(
+entry:
+; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  br i1 undef, label %left, label %right
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %left, label %right
+
+left:
+  br label %merge
+
+right:
+  br label %merge
+
+merge:
+; CHECK: merge:
+; CHECK-NOT: call void (i1, ...) @llvm.experimental.guard(
+; CHECK: ret void
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+}
+
+; Like @f_1, but we have some code we need to hoist before we can
+; widen a dominanting check.
+define void @f_2(i32 %a, i32 %b) {
+; CHECK-LABEL: @f_2(
+entry:
+; CHECK:  %cond_0 = icmp ult i32 %a, 10
+; CHECK:  %cond_1 = icmp ult i32 %b, 10
+; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  br i1 undef, label %left, label %right
+
+  %cond_0 = icmp ult i32 %a, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %left, label %right
+
+left:
+  br label %merge
+
+right:
+  br label %merge
+
+merge:
+  %cond_1 = icmp ult i32 %b, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+}
+
+; Negative test: don't hoist stuff out of control flow
+; indiscriminately, since that can make us do more work than needed.
+define void @f_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @f_3(
+entry:
+; CHECK:  %cond_0 = icmp ult i32 %a, 10
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK:  br i1 undef, label %left, label %right
+
+  %cond_0 = icmp ult i32 %a, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %left, label %right
+
+left:
+; CHECK: left:
+; CHECK:   %cond_1 = icmp ult i32 %b, 10
+; CHECK:   call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+; CHECK:   ret void
+
+  %cond_1 = icmp ult i32 %b, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+
+right:
+  ret void
+}
+
+; But hoisting out of control flow is fine if it makes a loop computed
+; condition loop invariant.  This behavior may require some tuning in
+; the future.
+define void @f_4(i32 %a, i32 %b) {
+; CHECK-LABEL: @f_4(
+entry:
+; CHECK:  %cond_0 = icmp ult i32 %a, 10
+; CHECK:  %cond_1 = icmp ult i32 %b, 10
+; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  br i1 undef, label %loop, label %leave
+
+  %cond_0 = icmp ult i32 %a, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %loop, label %leave
+
+loop:
+  %cond_1 = icmp ult i32 %b, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  br i1 undef, label %loop, label %leave
+
+leave:
+  ret void
+}
+
+; Hoisting out of control flow is also fine if we can widen the
+; dominating check without doing any extra work.
+define void @f_5(i32 %a) {
+; CHECK-LABEL: @f_5(
+entry:
+; CHECK:  %wide.chk = icmp ugt i32 %a, 10
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  br i1 undef, label %left, label %right
+
+  %cond_0 = icmp ugt i32 %a, 7
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %left, label %right
+
+left:
+  %cond_1 = icmp ugt i32 %a, 10
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+
+right:
+  ret void
+}
+
+; Negative test: the load from %a can be safely speculated to before
+; the first guard, but there is no guarantee that it will produce the
+; same value.
+define void @f_6(i1* dereferenceable(32) %a, i1* %b, i1 %unknown) {
+; CHECK-LABEL: @f_6(
+; CHECK: call void (i1, ...) @llvm.experimental.guard(
+; CHECK: call void (i1, ...) @llvm.experimental.guard(
+; CHECK: ret void
+entry:
+  %cond_0 = load i1, i1* %a
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  store i1 %unknown, i1* %b
+  %cond_1 = load i1, i1* %a
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+}
+
+; All else equal, we try to widen the earliest guard we can.  This
+; heuristic can use some tuning.
+define void @f_7(i32 %a, i1* %cond_buf) {
+; CHECK-LABEL: @f_7(
+entry:
+; CHECK:  %cond_1 = load volatile i1, i1* %cond_buf
+; CHECK:  %cond_3 = icmp ult i32 %a, 7
+; CHECK:  %wide.chk = and i1 %cond_1, %cond_3
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  %cond_2 = load volatile i1, i1* %cond_buf
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %cond_2) [ "deopt"() ]
+; CHECK:  br i1 undef, label %left, label %right
+
+  %cond_1 = load volatile i1, i1* %cond_buf
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  %cond_2 = load volatile i1, i1* %cond_buf
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_2) [ "deopt"() ]
+  br i1 undef, label %left, label %right
+
+left:
+  %cond_3 = icmp ult i32 %a, 7
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_3) [ "deopt"() ]
+  br label %left
+
+right:
+  ret void
+}
+
+; In this case the earliest dominating guard is in a loop, and we
+; don't want to put extra work in there.  This heuristic can use some
+; tuning.
+define void @f_8(i32 %a, i1 %cond_1, i1 %cond_2) {
+; CHECK-LABEL: @f_8(
+entry:
+  br label %loop
+
+loop:
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  br i1 undef, label %loop, label %leave
+
+leave:
+; CHECK: leave:
+; CHECK:  %cond_3 = icmp ult i32 %a, 7
+; CHECK:  %wide.chk = and i1 %cond_2, %cond_3
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  br i1 undef, label %loop2, label %leave2
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_2) [ "deopt"() ]
+  br i1 undef, label %loop2, label %leave2
+
+loop2:
+  %cond_3 = icmp ult i32 %a, 7
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_3) [ "deopt"() ]
+  br label %loop2
+
+leave2:
+  ret void
+}
+
+; In cases like these where there isn't any "obviously profitable"
+; widening sites, we refuse to do anything.
+define void @f_9(i32 %a, i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_9(
+entry:
+  br label %first_loop
+
+first_loop:
+; CHECK: first_loop:
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK:  br i1 undef, label %first_loop, label %second_loop
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %first_loop, label %second_loop
+
+second_loop:
+; CHECK: second_loop:
+; CHECK:   call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+; CHECK:   br label %second_loop
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  br label %second_loop
+}
+
+; Same situation as in @f_9: no "obviously profitable" widening sites,
+; so we refuse to do anything.
+define void @f_10(i32 %a, i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_10(
+entry:
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK:  br i1 undef, label %loop, label %no_loop
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %loop, label %no_loop
+
+no_loop:
+; CHECK: no_loop:
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+; CHECK:  ret void
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  ret void
+}
+
+; With guards in loops, we're okay hoisting out the guard into the
+; containing loop.
+define void @f_11(i32 %a, i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_11(
+entry:
+  br label %inner
+
+inner:
+; CHECK: inner:
+; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK:  br i1 undef, label %inner, label %outer
+
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  br i1 undef, label %inner, label %outer
+
+outer:
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  br label %inner
+}
+
+; Checks that we are adequately guarded against exponential-time
+; behavior when hoisting code.
+define void @f_12(i32 %a0) {
+; CHECK-LABEL: @f_12
+
+; Eliding the earlier 29 multiplications for brevity
+; CHECK:  %a30 = mul i32 %a29, %a29
+; CHECK-NEXT:  %cond = trunc i32 %a30 to i1
+; CHECK-NEXT:  %wide.chk = and i1 true, %cond
+; CHECK-NEXT:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK-NEXT:  ret void
+
+entry:
+  call void(i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
+  %a1 = mul i32 %a0, %a0
+  %a2 = mul i32 %a1, %a1
+  %a3 = mul i32 %a2, %a2
+  %a4 = mul i32 %a3, %a3
+  %a5 = mul i32 %a4, %a4
+  %a6 = mul i32 %a5, %a5
+  %a7 = mul i32 %a6, %a6
+  %a8 = mul i32 %a7, %a7
+  %a9 = mul i32 %a8, %a8
+  %a10 = mul i32 %a9, %a9
+  %a11 = mul i32 %a10, %a10
+  %a12 = mul i32 %a11, %a11
+  %a13 = mul i32 %a12, %a12
+  %a14 = mul i32 %a13, %a13
+  %a15 = mul i32 %a14, %a14
+  %a16 = mul i32 %a15, %a15
+  %a17 = mul i32 %a16, %a16
+  %a18 = mul i32 %a17, %a17
+  %a19 = mul i32 %a18, %a18
+  %a20 = mul i32 %a19, %a19
+  %a21 = mul i32 %a20, %a20
+  %a22 = mul i32 %a21, %a21
+  %a23 = mul i32 %a22, %a22
+  %a24 = mul i32 %a23, %a23
+  %a25 = mul i32 %a24, %a24
+  %a26 = mul i32 %a25, %a25
+  %a27 = mul i32 %a26, %a26
+  %a28 = mul i32 %a27, %a27
+  %a29 = mul i32 %a28, %a28
+  %a30 = mul i32 %a29, %a29
+  %cond = trunc i32 %a30 to i1
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  ret void
+}