diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -387,6 +387,7 @@
   bool simplifyCallSite(Function *F, CallBase &Call);
   template <typename Callable>
   bool simplifyInstruction(Instruction &I, Callable Evaluate);
+  bool simplifyIntrinsicCallIsConstant(CallBase &CB);
   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
 
   /// Return true if the given argument to the function being considered for
@@ -1531,6 +1532,27 @@
   return true;
 }
 
+/// Try to simplify a call to llvm.is.constant.
+///
+/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
+/// we expect calls of this specific intrinsic to be infrequent.
+///
+/// FIXME: If we knew CB's parent's caller, we might be able to determine
+/// whether inlining CB's parent into CB's parent's caller would change how the
+/// call to llvm.is.constant would evaluate. The member CandidateCall of
+/// CallAnalyzer is CB's parent's caller.
+bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
+  Value *Arg = CB.getArgOperand(0);
+  auto *C = dyn_cast<Constant>(Arg);
+
+  if (!C)
+    C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
+
+  Type *RT = CB.getFunctionType()->getReturnType();
+  SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
+  return true;
+}
+
 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
   // Propagate constants through bitcasts.
   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
@@ -2154,6 +2176,8 @@
       if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
         SROAArgValues[II] = SROAArg;
       return true;
+    case Intrinsic::is_constant:
+      return simplifyIntrinsicCallIsConstant(Call);
     }
   }
 
diff --git a/llvm/test/Transforms/Inline/call-intrinsic-is-constant.ll b/llvm/test/Transforms/Inline/call-intrinsic-is-constant.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/Inline/call-intrinsic-is-constant.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes=inline -S -inline-threshold=20 | FileCheck %s
+
+; In this test we basically have the following C code:
+
+; long hweight_long_w;
+; int hweight_long (void) {
+;   if (__builtin_constant_p(w))
+;     // lot's of code that would fold away if w was Constant, but no amount of
+;     // inlining will make it so.
+;   else
+;     // a little bit of code.
+; }
+; int __nodes_weight (void) { hweight_long(); }
+; int amd_numa_init (void) { __nodes_weight(); }
+
+; The point of this test is that __builtin_constant_p (which is lowered to a
+; call to the intrinsic @llvm.is.constant.i64) does not hinder inlining
+; hweight_long all the way up into amd_numa_init.
+
+@hweight_long_w = external dso_local global i64, align 8
+
+; Testing the InlineCost of the call to @llvm.is.constant.i64.
+; Do not change the linkage of @hweight_long; that will give it a severe
+; discount in cost (LastCallToStaticBonus).
+define i32 @hweight_long() {
+; CHECK-LABEL: @hweight_long(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* @hweight_long_w, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.constant.i64(i64 [[TMP0]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP0]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[AND]], [[TMP0]]
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i64, ...) bitcast (i32 (...)* @__arch_hweight64 to i32 (i64, ...)*)(i64 [[TMP0]])
+; CHECK-NEXT:    [[CONV286:%.*]] = sext i32 [[CALL]] to i64
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i64 [ [[ADD]], [[COND_TRUE]] ], [ [[CONV286]], [[COND_FALSE]] ]
+; CHECK-NEXT:    [[CONV287:%.*]] = trunc i64 [[COND]] to i32
+; CHECK-NEXT:    ret i32 [[CONV287]]
+;
+entry:
+  %0 = load i64, i64* @hweight_long_w, align 8
+  %1 = call i1 @llvm.is.constant.i64(i64 %0)
+  br i1 %1, label %cond.true, label %cond.false
+
+cond.true:
+  %and = and i64 %0, 1
+  %add = add nsw i64 %and, %0
+  br label %cond.end
+
+cond.false:
+  %call = call i32 (i64, ...) bitcast (i32 (...)* @__arch_hweight64 to i32 (i64, ...)*)(i64 %0)
+  %conv286 = sext i32 %call to i64
+  br label %cond.end
+
+cond.end:
+  %cond = phi i64 [ %add, %cond.true ], [ %conv286, %cond.false ]
+  %conv287 = trunc i64 %cond to i32
+  ret i32 %conv287
+}
+
+; Do not change the linkage of @__nodes_weight; that will give it a severe
+; discount in cost (LastCallToStaticBonus).
+define i32 @__nodes_weight() {
+; CHECK-LABEL: @__nodes_weight(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__TRANS_TMP_1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* @hweight_long_w, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.constant.i64(i64 [[TMP0]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[COND_TRUE_I:%.*]], label [[COND_FALSE_I:%.*]]
+; CHECK:       cond.true.i:
+; CHECK-NEXT:    [[AND_I:%.*]] = and i64 [[TMP0]], 1
+; CHECK-NEXT:    [[ADD_I:%.*]] = add nsw i64 [[AND_I]], [[TMP0]]
+; CHECK-NEXT:    br label [[HWEIGHT_LONG_EXIT:%.*]]
+; CHECK:       cond.false.i:
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i32 (i64, ...) bitcast (i32 (...)* @__arch_hweight64 to i32 (i64, ...)*)(i64 [[TMP0]])
+; CHECK-NEXT:    [[CONV286_I:%.*]] = sext i32 [[CALL_I]] to i64
+; CHECK-NEXT:    br label [[HWEIGHT_LONG_EXIT]]
+; CHECK:       hweight_long.exit:
+; CHECK-NEXT:    [[COND_I:%.*]] = phi i64 [ [[ADD_I]], [[COND_TRUE_I]] ], [ [[CONV286_I]], [[COND_FALSE_I]] ]
+; CHECK-NEXT:    [[CONV287_I:%.*]] = trunc i64 [[COND_I]] to i32
+; CHECK-NEXT:    store i32 [[CONV287_I]], i32* [[__TRANS_TMP_1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[__TRANS_TMP_1]], align 4
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %__trans_tmp_1 = alloca i32, align 4
+  %call = call i32 @hweight_long()
+  store i32 %call, i32* %__trans_tmp_1, align 4
+  %0 = load i32, i32* %__trans_tmp_1, align 4
+  ret i32 %0
+}
+
+; The real goal of this test is that @hweight_long gets fully inlined here.
+define dso_local i32 @amd_numa_init() {
+; CHECK-LABEL: @amd_numa_init(
+; CHECK-NEXT:    [[__TRANS_TMP_1_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[__TRANS_TMP_1_I]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* @hweight_long_w, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.is.constant.i64(i64 [[TMP2]])
+; CHECK-NEXT:    br i1 [[TMP3]], label [[COND_TRUE_I_I:%.*]], label [[COND_FALSE_I_I:%.*]]
+; CHECK:       cond.true.i.i:
+; CHECK-NEXT:    [[AND_I_I:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT:    [[ADD_I_I:%.*]] = add nsw i64 [[AND_I_I]], [[TMP2]]
+; CHECK-NEXT:    br label [[__NODES_WEIGHT_EXIT:%.*]]
+; CHECK:       cond.false.i.i:
+; CHECK-NEXT:    [[CALL_I_I:%.*]] = call i32 (i64, ...) bitcast (i32 (...)* @__arch_hweight64 to i32 (i64, ...)*)(i64 [[TMP2]])
+; CHECK-NEXT:    [[CONV286_I_I:%.*]] = sext i32 [[CALL_I_I]] to i64
+; CHECK-NEXT:    br label [[__NODES_WEIGHT_EXIT]]
+; CHECK:       __nodes_weight.exit:
+; CHECK-NEXT:    [[COND_I_I:%.*]] = phi i64 [ [[ADD_I_I]], [[COND_TRUE_I_I]] ], [ [[CONV286_I_I]], [[COND_FALSE_I_I]] ]
+; CHECK-NEXT:    [[CONV287_I_I:%.*]] = trunc i64 [[COND_I_I]] to i32
+; CHECK-NEXT:    store i32 [[CONV287_I_I]], i32* [[__TRANS_TMP_1_I]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[__TRANS_TMP_1_I]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[__TRANS_TMP_1_I]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP5]])
+; CHECK-NEXT:    br label [[IF_END7:%.*]]
+; CHECK:       if.end7:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[RETVAL]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %retval = alloca i32, align 4
+  %call6 = call i32 @__nodes_weight()
+  br label %if.end7
+
+if.end7:
+  %load = load i32, i32* %retval, align 4
+  ret i32 %load
+}
+
+declare i1 @llvm.is.constant.i64(i64)
+declare dso_local i32 @__arch_hweight64(...)