diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -30,6 +31,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/ScaledNumber.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
@@ -52,6 +54,8 @@
           "Number of select groups not converted due to unpredictability");
 STATISTIC(NumSelectColdBB,
           "Number of select groups not converted due to cold basic block");
+STATISTIC(NumSelectConvertedLoop,
+          "Number of select groups converted due to loop-level analysis");
 STATISTIC(NumSelectsConverted, "Number of selects converted");
 
 static cl::opt<unsigned> ColdOperandThreshold(
@@ -65,6 +69,26 @@
              "slice of a cold operand to be considered inexpensive."),
     cl::init(1), cl::Hidden);
 
+static cl::opt<unsigned>
+    LoopGradientGain("select-opti-loop-gradient-gain",
+                     cl::desc("Gradient gain threshold (%)."), cl::init(25),
+                     cl::Hidden);
+
+static cl::opt<unsigned>
+    GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
+                       cl::desc("Minimum gain per loop (in cycles) threshold."),
+                       cl::init(4), cl::Hidden);
+
+static cl::opt<unsigned> GainRelativeThreshold(
+    "select-opti-loop-relative-gain-threshold",
+    cl::desc(
+        "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
+    cl::init(8), cl::Hidden);
+
+static cl::opt<unsigned> MispredictDefaultRate(
+    "mispredict-default-rate", cl::Hidden, cl::init(25),
+    cl::desc("Default mispredict rate (initialized to 25%)."));
+
 namespace {
 
 class SelectOptimize : public FunctionPass {
@@ -78,6 +102,7 @@
   std::unique_ptr<BranchProbabilityInfo> BPI;
   ProfileSummaryInfo *PSI;
   OptimizationRemarkEmitter *ORE;
+  TargetSchedModel TSchedModel;
 
 public:
   static char ID;
@@ -103,6 +128,15 @@
   using SelectGroup = SmallVector<SelectInst *, 2>;
   using SelectGroups = SmallVector<SelectGroup, 2>;
 
+  using Scaled64 = ScaledNumber<uint64_t>;
+
+  struct CostInfo {
+    /// Predicated cost (with selects as conditional moves).
+    Scaled64 PredCost;
+    /// Non-predicated cost (with selects converted to branches).
+    Scaled64 NonPredCost;
+  };
+
   // Converts select instructions of a function to conditional jumps when deemed
   // profitable. Returns true if at least one select was converted.
   bool optimizeSelects(Function &F);
@@ -122,9 +156,12 @@
   void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
 
   // Determines for which select groups it is profitable converting to branches
-  // (base heuristics).
+  // (base and inner-most-loop heuristics).
   void findProfitableSIGroupsBase(SelectGroups &SIGroups,
                                   SelectGroups &ProfSIGroups);
+  void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
+                                        SelectGroups &ProfSIGroups);
+
   // Determines if a select group should be converted to a branch (base
   // heuristics).
   bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
@@ -143,6 +180,29 @@
   // Returns true if the condition of the select is highly predictable.
   bool isSelectHighlyPredictable(const SelectInst *SI);
 
+  // Loop-level checks to determine if a non-predicated version (with branches)
+  // of the given loop is more profitable than its predicated version.
+  bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
+
+  // Computes instruction and loop-critical-path costs for both the predicated
+  // and non-predicated version of the given loop.
+  bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
+                        DenseMap<const Instruction *, CostInfo> &InstCostMap,
+                        CostInfo *LoopCost);
+
+  // Returns a set of all the select instructions in the given select groups.
+  SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
+
+  // Returns the latency cost of a given instruction.
+  Optional<uint64_t> computeInstCost(const Instruction *I);
+
+  // Returns the misprediction cost of a given select when converted to branch.
+  Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
+
+  // Returns the cost of a branch when the prediction is correct.
+  Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+                                const SelectInst *SI);
+
   // Returns true if the target architecture supports lowering a given select.
   bool isSelectKindSupported(SelectInst *SI);
 };
@@ -183,6 +243,7 @@
   BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+  TSchedModel.init(TSI);
 
   // When optimizing for size, selects are preferable over branches.
   if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
@@ -224,7 +285,24 @@
 }
 
 void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
-                                               SelectGroups &ProfSIGroups) {}
+                                               SelectGroups &ProfSIGroups) {
+  SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
+  // Need to check size on each iteration as we accumulate child loops.
+  for (unsigned long i = 0; i < Loops.size(); ++i)
+    for (Loop *ChildL : Loops[i]->getSubLoops())
+      Loops.push_back(ChildL);
+
+  for (Loop *L : Loops) {
+    if (!L->isInnermost())
+      continue;
+
+    SelectGroups SIGroups;
+    for (BasicBlock *BB : L->getBlocks())
+      collectSelectGroups(*BB, SIGroups);
+
+    findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
+  }
+}
 
 /// If \p isTrue is true, return the true value of \p SI, otherwise return
 /// false value of \p SI. If the true/false value of \p SI is defined by any
@@ -376,6 +454,52 @@
   }
 }
 
+void SelectOptimize::findProfitableSIGroupsInnerLoops(
+    const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
+  NumSelectOptAnalyzed += SIGroups.size();
+  // For each select group in an inner-most loop,
+  // a branch is more preferable than a select/conditional-move if:
+  // i) conversion to branches satisfies loop-level heuristics including
+  //    reducing the loop's critical path by some threshold; and
+  // ii) the total cost of the select group is cheaper with a branch compared
+  //     to its predicated version. The cost is in terms of latency and the cost
+  //     of a select group is the cost of its most expensive select instruction
+  //     (assuming infinite resources and thus fully leveraging available ILP).
+
+  DenseMap<const Instruction *, CostInfo> InstCostMap;
+  CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
+                          {Scaled64::getZero(), Scaled64::getZero()}};
+  if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
+      !checkLoopHeuristics(L, LoopCost)) {
+    return;
+  }
+
+  for (SelectGroup &ASI : SIGroups) {
+    // Assuming infinite resources, the cost of a group of instructions is the
+    // cost of the most expensive instruction of the group.
+    Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
+    for (SelectInst *SI : ASI) {
+      SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
+      BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
+    }
+    if (BranchCost < SelectCost) {
+      OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
+      OR << "Profitable to convert to branch (loop analysis). BranchCost="
+         << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
+         << ". ";
+      ORE->emit(OR);
+      ++NumSelectConvertedLoop;
+      ProfSIGroups.push_back(ASI);
+    } else {
+      OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+      ORmiss << "Select is more profitable (loop analysis). BranchCost="
+             << BranchCost.toString()
+             << ", SelectCost=" << SelectCost.toString() << ". ";
+      ORE->emit(ORmiss);
+    }
+  }
+}
+
 bool SelectOptimize::isConvertToBranchProfitableBase(
     const SmallVector<SelectInst *, 2> &ASI) {
   SelectInst *SI = ASI.front();
@@ -529,6 +653,213 @@
   return false;
 }
 
+bool SelectOptimize::checkLoopHeuristics(const Loop *L,
+                                         const CostInfo LoopCost[2]) {
+  // Loop-level checks to determine if a non-predicated version (with branches)
+  // of the loop is more profitable than its predicated version.
+  //
+  OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
+                                   L->getHeader()->getFirstNonPHI());
+
+  if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
+      LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
+    ORmissL << "Did not convert any select in the loop due to no reduction of "
+               "loop's critical path. ";
+    ORE->emit(ORmissL);
+    return false;
+  }
+
+  Scaled64 Diff[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
+                      LoopCost[1].PredCost - LoopCost[1].NonPredCost};
+
+  // Profitably converting to branches need to reduce the loop's critical path
+  // by at least some threshold (absolute gain of GainCycleThreshold cycles and
+  // relative gain of 12.5%).
+  if (Diff[1] < Scaled64::get(GainCycleThreshold) &&
+      Diff[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
+    ORmissL
+        << "Did not convert any select in the loop due to small reduction of "
+           "loop's critical path. ";
+    ORE->emit(ORmissL);
+    return false;
+  }
+
+  // If the loop's critical path involves loop-carried dependences, the gradient
+  // of the gain needs to be at least LoopGradientGain% (defaults to 25%).
+  // This check ensures that the latency reduction for the loop's critical path
+  // keeps decreasing with sufficient rate beyond the two analyzed loop
+  // iterations.
+  if (Diff[1] > Diff[0] && (Diff[1] - Diff[0]) * Scaled64::get(100) <
+                               (LoopCost[1].PredCost - LoopCost[0].PredCost) *
+                                   Scaled64::get(LoopGradientGain)) {
+    ORmissL << "Did not convert any select in the loop due to small gradient "
+               "gain. ";
+    ORE->emit(ORmissL);
+    return false;
+  }
+  // If the gain decreases it is not profitable to convert.
+  else if (Diff[1] < Diff[0]) {
+    ORmissL
+        << "Did not convert any select in the loop due to negative gradient "
+           "gain. ";
+    ORE->emit(ORmissL);
+    return false;
+  }
+
+  // Non-predicated version of the loop is more profitable than its
+  // predicated version.
+  return true;
+}
+
+// Computes instruction and loop-critical-path costs for both the predicated
+// and non-predicated version of the given loop.
+// Returns false if unable to compute these costs due to invalid cost of loop
+// instruction(s).
+bool SelectOptimize::computeLoopCosts(
+    const Loop *L, const SelectGroups &SIGroups,
+    DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
+  const auto &SIset = getSIset(SIGroups);
+  // Compute instruction and loop-critical-path costs across two iterations for
+  // both predicated and non-predicated version.
+  const unsigned Iterations = 2;
+  for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
+    // Cost of the loop's critical path.
+    CostInfo &MaxCost = LoopCost[Iter];
+    for (BasicBlock *BB : L->getBlocks()) {
+      for (const Instruction &I : *BB) {
+        if (I.isDebugOrPseudoInst())
+          continue;
+        // Compute the predicated and non-predicated cost of the instruction.
+        Scaled64 IPredCost = Scaled64::getZero(),
+                 INonPredCost = Scaled64::getZero();
+
+        // Assume infinite resources that allow to fully exploit the available
+        // instruction-level parallelism.
+        // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
+        for (const Use &U : I.operands()) {
+          auto UI = dyn_cast<Instruction>(U.get());
+          if (!UI)
+            continue;
+          if (InstCostMap.count(UI)) {
+            IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
+            INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
+          }
+        }
+        auto ILatency = computeInstCost(&I);
+        if (!ILatency.hasValue()) {
+          OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
+          ORmissL << "Invalid instruction cost preventing analysis and "
+                     "optimization of the inner-most loop containing this "
+                     "instruction. ";
+          ORE->emit(ORmissL);
+          return false;
+        }
+        IPredCost += Scaled64::get(ILatency.getValue());
+        INonPredCost += Scaled64::get(ILatency.getValue());
+
+        // For a select that can be converted to branch,
+        // compute its cost as a branch (non-predicated cost).
+        //
+        // BranchCost = PredictedPathCost + MispredictCost
+        // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
+        // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
+        if (SIset.contains(&I)) {
+          auto SI = dyn_cast<SelectInst>(&I);
+
+          Scaled64 TrueOpCost = Scaled64::getZero(),
+                   FalseOpCost = Scaled64::getZero();
+          if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
+            if (InstCostMap.count(TI))
+              TrueOpCost = InstCostMap[TI].NonPredCost;
+          if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
+            if (InstCostMap.count(FI))
+              FalseOpCost = InstCostMap[FI].NonPredCost;
+          Scaled64 PredictedPathCost =
+              getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
+
+          Scaled64 CondCost = Scaled64::getZero();
+          if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
+            if (InstCostMap.count(CI))
+              CondCost = InstCostMap[CI].NonPredCost;
+          Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
+
+          INonPredCost = PredictedPathCost + MispredictCost;
+        }
+
+        InstCostMap[&I] = {IPredCost, INonPredCost};
+        MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
+        MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
+      }
+    }
+  }
+  return true;
+}
+
+SmallPtrSet<const Instruction *, 2>
+SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+  SmallPtrSet<const Instruction *, 2> SIset;
+  for (const SelectGroup &ASI : SIGroups)
+    for (const SelectInst *SI : ASI)
+      SIset.insert(SI);
+  return SIset;
+}
+
+Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+  InstructionCost ICost =
+      TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+  if (auto OC = ICost.getValue())
+    return Optional<uint64_t>(OC.getValue());
+  return Optional<uint64_t>(None);
+}
+
+ScaledNumber<uint64_t>
+SelectOptimize::getMispredictionCost(const SelectInst *SI,
+                                     const Scaled64 CondCost) {
+  uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
+
+  // Account for the default misprediction rate when using a branch
+  // (conservatively set to 25% by default).
+  uint64_t MispredictRate = MispredictDefaultRate;
+  // If the select condition is obviously predictable, then the misprediction
+  // rate is zero.
+  if (isSelectHighlyPredictable(SI))
+    MispredictRate = 0;
+
+  // CondCost is included to account for cases where the computation of the
+  // condition is part of a long dependence chain (potentially loop-carried)
+  // that would delay detection of a misprediction and increase its cost.
+  Scaled64 MispredictCost =
+      std::max(Scaled64::get(MispredictPenalty), CondCost) *
+      Scaled64::get(MispredictRate);
+  MispredictCost /= Scaled64::get(100);
+
+  return MispredictCost;
+}
+
+// Returns the cost of a branch when the prediction is correct.
+// TrueCost * TrueProbability + FalseCost * FalseProbability.
+ScaledNumber<uint64_t>
+SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+                                     const SelectInst *SI) {
+  Scaled64 PredPathCost;
+  uint64_t TrueWeight, FalseWeight;
+  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+    uint64_t SumWeight = TrueWeight + FalseWeight;
+    if (SumWeight != 0) {
+      PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
+                     FalseCost * Scaled64::get(FalseWeight);
+      PredPathCost /= Scaled64::get(SumWeight);
+      return PredPathCost;
+    }
+  }
+  // Without branch weight metadata, we assume 75% for the one path and 25% for
+  // the other, and pick the result with the biggest cost.
+  PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
+                          FalseCost * Scaled64::get(3) + TrueCost);
+  PredPathCost /= Scaled64::get(4);
+  return PredPathCost;
+}
+
 bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
   if (VectorCond)
diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll
--- a/llvm/test/CodeGen/X86/select-optimize.ll
+++ b/llvm/test/CodeGen/X86/select-optimize.ll
@@ -211,6 +211,78 @@
   ret i32 %add
 }
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Test loop heuristic: loop-level critical-path analysis
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Use of cmov in this test would put a load and a fsub on the critical path.
+;; Loop-level analysis should decide to form a branch.
+;;
+;;double cmov_on_critical_path(int n, double x, double *a) {
+;;  for (int i = 0; i < n; i++) {
+;;    double r = a[i];
+;;    if (x > r)
+;; 			// 50% of iterations
+;;   		x -= r;
+;;  }
+;;  return x;
+;;}
+define double @cmov_on_critical_path(i32 %n, double %x, double* nocapture %a) {
+; CHECK-LABEL: @cmov_on_critical_path(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret double [[X:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[R:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[X1]], [[R]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]]
+; CHECK-NEXT:    [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]]
+; CHECK-NEXT:    br i1 [[X2_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]], !prof [[PROF20:![0-9]+]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[X2]] = phi double [ [[SUB]], [[FOR_BODY]] ], [ [[X1]], [[SELECT_FALSE]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.exit:
+; CHECK-NEXT:    ret double [[X2]]
+;
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %entry
+  ret double %x
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  %r = load double, double* %arrayidx, align 8
+  %sub = fsub double %x1, %r
+  %cmp2 = fcmp ogt double %x1, %r
+  %x2 = select i1 %cmp2, double %sub, double %x1, !prof !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:                                         ; preds = %for.body
+  ret double %x2
+}
+
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata)