diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -44,6 +44,7 @@
 class SelectInst;
 class SwitchInst;
 class TargetLibraryInfo;
+class TargetTransformInfo;
 class Value;
 
 /// A private "module" namespace for types and utilities used by
@@ -78,6 +79,7 @@
 /// revectored to the false side of the second if.
 class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
   TargetLibraryInfo *TLI;
+  TargetTransformInfo *TTI;
   LazyValueInfo *LVI;
   AAResults *AA;
   DomTreeUpdater *DTU;
@@ -99,9 +101,9 @@
   JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
 
   // Glue for old PM.
-  bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI,
-               AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData,
-               std::unique_ptr<BlockFrequencyInfo> BFI,
+  bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+               LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
+               bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
                std::unique_ptr<BranchProbabilityInfo> BPI);
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -331,7 +331,7 @@
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
-  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
+  bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
                               std::move(BFI), std::move(BPI));
   if (PrintLVIAfterJumpThreading) {
     dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -360,7 +360,7 @@
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
-  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
+  bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
                          std::move(BFI), std::move(BPI));
 
   if (PrintLVIAfterJumpThreading) {
@@ -377,12 +377,14 @@
 }
 
 bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
-                                LazyValueInfo *LVI_, AliasAnalysis *AA_,
-                                DomTreeUpdater *DTU_, bool HasProfileData_,
+                                TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
+                                AliasAnalysis *AA_, DomTreeUpdater *DTU_,
+                                bool HasProfileData_,
                                 std::unique_ptr<BlockFrequencyInfo> BFI_,
                                 std::unique_ptr<BranchProbabilityInfo> BPI_) {
   LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TLI = TLI_;
+  TTI = TTI_;
   LVI = LVI_;
   AA = AA_;
   DTU = DTU_;
@@ -514,7 +516,8 @@
 /// Return the cost of duplicating a piece of this block from first non-phi
 /// and before StopAt instruction to thread across it. Stop scanning the block
 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
-static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
+                                             BasicBlock *BB,
                                              Instruction *StopAt,
                                              unsigned Threshold) {
   assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
@@ -550,26 +553,21 @@
     if (Size > Threshold)
       return Size;
 
-    // Debugger intrinsics don't incur code size.
-    if (isa<DbgInfoIntrinsic>(I)) continue;
-
-    // Pseudo-probes don't incur code size.
-    if (isa<PseudoProbeInst>(I))
-      continue;
-
-    // If this is a pointer->pointer bitcast, it is free.
-    if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
-      continue;
-
-    // Freeze instruction is free, too.
-    if (isa<FreezeInst>(I))
-      continue;
-
     // Bail out if this instruction gives back a token type, it is not possible
     // to duplicate it if it is used outside this BB.
     if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
       return ~0U;
 
+    // Blocks with NoDuplicate are modelled as having infinite cost, so they
+    // are never duplicated.
+    if (const CallInst *CI = dyn_cast<CallInst>(I))
+      if (CI->cannotDuplicate() || CI->isConvergent())
+        return ~0U;
+
+    if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
+            == TargetTransformInfo::TCC_Free)
+      continue;
+
     // All other instructions count for at least one unit.
     ++Size;
 
@@ -578,11 +576,7 @@
     // as having cost of 2 total, and if they are a vector intrinsic, we model
     // them as having cost 1.
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (CI->cannotDuplicate() || CI->isConvergent())
-        // Blocks with NoDuplicate are modelled as having infinite cost, so they
-        // are never duplicated.
-        return ~0U;
-      else if (!isa<IntrinsicInst>(CI))
+      if (!isa<IntrinsicInst>(CI))
         Size += 3;
       else if (!CI->getType()->isVectorTy())
         Size += 1;
@@ -2234,10 +2228,10 @@
   }
 
   // Compute the cost of duplicating BB and PredBB.
-  unsigned BBCost =
-      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+  unsigned BBCost = getJumpThreadDuplicationCost(
+      TTI, BB, BB->getTerminator(), BBDupThreshold);
   unsigned PredBBCost = getJumpThreadDuplicationCost(
-      PredBB, PredBB->getTerminator(), BBDupThreshold);
+      TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
 
   // Give up if costs are too high.  We need to check BBCost and PredBBCost
   // individually before checking their sum because getJumpThreadDuplicationCost
@@ -2345,8 +2339,8 @@
     return false;
   }
 
-  unsigned JumpThreadCost =
-      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(
+      TTI, BB, BB->getTerminator(), BBDupThreshold);
   if (JumpThreadCost > BBDupThreshold) {
     LLVM_DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
                       << "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -2614,8 +2608,8 @@
     return false;
   }
 
-  unsigned DuplicationCost =
-      getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(
+      TTI, BB, BB->getTerminator(), BBDupThreshold);
   if (DuplicationCost > BBDupThreshold) {
     LLVM_DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
                       << "' - Cost is too high: " << DuplicationCost << "\n");
@@ -3031,7 +3025,8 @@
 
   ValueToValueMapTy UnguardedMapping, GuardedMapping;
   Instruction *AfterGuard = Guard->getNextNode();
-  unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+  unsigned Cost =
+      getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
   if (Cost > BBDupThreshold)
     return false;
   // Duplicate all instructions before the guard and the guard itself to the
diff --git a/llvm/test/Transforms/JumpThreading/free_instructions.ll b/llvm/test/Transforms/JumpThreading/free_instructions.ll
--- a/llvm/test/Transforms/JumpThreading/free_instructions.ll
+++ b/llvm/test/Transforms/JumpThreading/free_instructions.ll
@@ -5,26 +5,28 @@
 ; the jump threading threshold, as everything else are free instructions.
 define i32 @free_instructions(i1 %c, i32* %p) {
 ; CHECK-LABEL: @free_instructions(
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK:       if:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF2:%.*]], label [[ELSE2:%.*]]
+; CHECK:       if2:
 ; CHECK-NEXT:    store i32 -1, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[JOIN:%.*]]
-; CHECK:       else:
-; CHECK-NEXT:    store i32 -2, i32* [[P]], align 4
-; CHECK-NEXT:    br label [[JOIN]]
-; CHECK:       join:
 ; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
 ; CHECK-NEXT:    store i32 1, i32* [[P]], align 4, !noalias !0
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ]
 ; CHECK-NEXT:    store i32 2, i32* [[P]], align 4
+; CHECK-NEXT:    [[P21:%.*]] = bitcast i32* [[P]] to i8*
+; CHECK-NEXT:    [[P32:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P21]])
+; CHECK-NEXT:    [[P43:%.*]] = bitcast i8* [[P32]] to i32*
+; CHECK-NEXT:    store i32 3, i32* [[P43]], align 4, !invariant.group !3
+; CHECK-NEXT:    ret i32 0
+; CHECK:       else2:
+; CHECK-NEXT:    store i32 -2, i32* [[P]], align 4
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]])
+; CHECK-NEXT:    store i32 1, i32* [[P]], align 4, !noalias !4
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ]
+; CHECK-NEXT:    store i32 2, i32* [[P]], align 4
 ; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
 ; CHECK-NEXT:    [[P3:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P2]])
 ; CHECK-NEXT:    [[P4:%.*]] = bitcast i8* [[P3]] to i32*
 ; CHECK-NEXT:    store i32 3, i32* [[P4]], align 4, !invariant.group !3
-; CHECK-NEXT:    br i1 [[C]], label [[IF2:%.*]], label [[ELSE2:%.*]]
-; CHECK:       if2:
-; CHECK-NEXT:    ret i32 0
-; CHECK:       else2:
 ; CHECK-NEXT:    ret i32 1
 ;
   br i1 %c, label %if, label %else
diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
--- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
@@ -32,13 +32,10 @@
 ; ASSUMPTIONS-OFF-NEXT:    br label [[COMMON_RET]]
 ;
 ; ASSUMPTIONS-ON-LABEL: @caller1(
-; ASSUMPTIONS-ON-NEXT:    br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE1:%.*]]
-; ASSUMPTIONS-ON:       false1:
-; ASSUMPTIONS-ON-NEXT:    store volatile i64 1, i64* [[PTR:%.*]], align 4
-; ASSUMPTIONS-ON-NEXT:    br label [[COMMON_RET]]
+; ASSUMPTIONS-ON-NEXT:    br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]]
 ; ASSUMPTIONS-ON:       common.ret:
-; ASSUMPTIONS-ON-NEXT:    [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE1]] ], [ 2, [[TMP0:%.*]] ]
-; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ]
+; ASSUMPTIONS-ON-NEXT:    [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ]
+; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 true) [ "align"(i64* [[PTR:%.*]], i64 8) ]
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
@@ -47,6 +44,9 @@
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 [[DOTSINK]], i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    ret void
+; ASSUMPTIONS-ON:       false2:
+; ASSUMPTIONS-ON-NEXT:    store volatile i64 1, i64* [[PTR]], align 4
+; ASSUMPTIONS-ON-NEXT:    br label [[COMMON_RET]]
 ;
   br i1 %c, label %true1, label %false1