diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -366,6 +366,9 @@
 ///                     size_t dstsize)
 TLI_DEFINE_ENUM_INTERNAL(memccpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__memccpy_chk")
+/// int __memcmpeq(const void *s1, const void *s2, size_t n);
+TLI_DEFINE_ENUM_INTERNAL(memcmpeq)
+TLI_DEFINE_STRING_INTERNAL("__memcmpeq")
 /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(memcpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__memcpy_chk")
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
--- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -85,10 +85,10 @@
     return SDValue();
   }
 
-  /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is
-  /// faster than a libcall. The first returned SDValue is the result of the
-  /// memcmp and the second is the chain. Both SDValues can be null if a normal
-  /// libcall should be used.
+  /// Emit target-specific code that performs a memcmp/bcmp/__memcmpeq, in cases
+  /// where that is faster than a libcall. The first returned SDValue is the
+  /// result of the memcmp and the second is the chain. Both SDValues can be
+  /// null if a normal libcall should be used.
   virtual std::pair<SDValue, SDValue>
   EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
                           SDValue Op1, SDValue Op2, SDValue Op3,
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -147,6 +147,10 @@
   Value *emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
                   const DataLayout &DL, const TargetLibraryInfo *TLI);
 
+  /// Emit a call to the __memcmpeq function.
+  Value *emitMemCmpEq(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
+                      const DataLayout &DL, const TargetLibraryInfo *TLI);
+
   /// Emit a call to the memccpy function.
   Value *emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
                      IRBuilderBase &B, const TargetLibraryInfo *TLI);
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -173,6 +173,7 @@
   Value *optimizeMemRChr(CallInst *CI, IRBuilderBase &B);
   Value *optimizeMemCmp(CallInst *CI, IRBuilderBase &B);
   Value *optimizeBCmp(CallInst *CI, IRBuilderBase &B);
+  Value *optimizeMemCmpEq(CallInst *CI, IRBuilderBase &B);
   Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilderBase &B);
   Value *optimizeMemCCpy(CallInst *CI, IRBuilderBase &B);
   Value *optimizeMemPCpy(CallInst *CI, IRBuilderBase &B);
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -33,6 +33,11 @@
                clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
                           "Intel SVML library")));
 
+static cl::opt<bool> WithBuiltinMemcmpeq(
+    "with-builtin-memcmpeq", cl::Hidden, cl::init(false),
+    cl::desc("Enable emitting __memcmpeq (as replacement for "
+             "boolean usage of memcmp/bcmp)"));
+
 StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
     {
 #define TLI_DEFINE_STRING
@@ -67,6 +72,13 @@
   return TT.isOSFreeBSD() || TT.isOSSolaris();
 }
 
+static bool hasMemcmpeq(const Triple &TT) {
+  if (WithBuiltinMemcmpeq) {
+    return true;
+  }
+  return false;
+}
+
 static bool isCallingConvCCompatible(CallingConv::ID CC, StringRef TT,
                                      FunctionType *FuncTy) {
   switch (CC) {
@@ -204,6 +216,12 @@
   if (!hasBcmp(T))
     TLI.setUnavailable(LibFunc_bcmp);
 
+  if (!hasMemcmpeq(T)) {
+    //    TLI.setAvailable(LibFunc_memcmpeq);
+    //  else
+    TLI.setUnavailable(LibFunc_memcmpeq);
+  }
+
   if (T.isMacOSX() && T.getArch() == Triple::x86 &&
       !T.isMacOSXVersionLT(10, 7)) {
     // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
@@ -574,6 +592,7 @@
     TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
     TLI.setUnavailable(LibFunc_under_IO_getc);
     TLI.setUnavailable(LibFunc_under_IO_putc);
+    TLI.setUnavailable(LibFunc_memcmpeq);
     // But, Android and musl have memalign.
     if (!T.isAndroid() && !T.isMusl())
       TLI.setUnavailable(LibFunc_memalign);
@@ -1191,6 +1210,7 @@
   case LibFunc_aligned_alloc:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
   case LibFunc_bcopy:
+  case LibFunc_memcmpeq:
   case LibFunc_bcmp:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1316,6 +1316,7 @@
     // on any target: A size_t argument (which may be an i32 on some targets)
     // should not trigger the assert below.
   case LibFunc_bcmp:
+  case LibFunc_memcmpeq:
   case LibFunc_calloc:
   case LibFunc_fwrite:
   case LibFunc_malloc:
@@ -1556,6 +1557,16 @@
       {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
 }
 
+Value *llvm::emitMemCmpEq(Value *Ptr1, Value *Ptr2, Value *Len,
+                          IRBuilderBase &B, const DataLayout &DL,
+                          const TargetLibraryInfo *TLI) {
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(
+      LibFunc_memcmpeq, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
+
 Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
                          IRBuilderBase &B, const TargetLibraryInfo *TLI) {
   return emitLibCall(
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1181,7 +1181,9 @@
   return nullptr;
 }
 
-// Most simplifications for memcmp also apply to bcmp.
+// Most simplifications for memcmp also apply to memcmpeq functions
+// including bcmp, __memcmpeq, or any other derivation function of
+// memcmp.
 Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
                                                    IRBuilderBase &B) {
   Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
@@ -1212,20 +1214,46 @@
     return V;
 
   // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
-  // bcmp can be more efficient than memcmp because it only has to know that
-  // there is a difference, not how different one is to the other.
-  if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
+  // bcmp and __memcmpeq can be more efficient than memcmp because it only has
+  // to know that there is a difference, not how different one is to the other.
+
+  // Note: We check both __memcmpeq and bcmp here (rather than having one
+  // forward to the other) because only one of them may be available.
+  if (((isLibFuncEmittable(M, TLI, LibFunc_bcmp) && TLI->has(LibFunc_bcmp)) ||
+       (isLibFuncEmittable(M, TLI, LibFunc_memcmpeq) &&
+        TLI->has(LibFunc_memcmpeq))) &&
       isOnlyUsedInZeroEqualityComparison(CI)) {
     Value *LHS = CI->getArgOperand(0);
     Value *RHS = CI->getArgOperand(1);
     Value *Size = CI->getArgOperand(2);
-    return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
+    // Prefer __memcmpeq if its available.
+    if (isLibFuncEmittable(M, TLI, LibFunc_memcmpeq) &&
+        TLI->has(LibFunc_memcmpeq)) {
+      return copyFlags(*CI, emitMemCmpEq(LHS, RHS, Size, B, DL, TLI));
+    } else {
+      return copyFlags(*CI, emitBCmp(LHS, RHS, Size, B, DL, TLI));
+    }
   }
 
   return nullptr;
 }
 
 Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
+  Module *M = CI->getModule();
+  if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
+    return V;
+
+  // Replace calls to bcmp with __memcmpeq is available.
+  if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) && TLI->has(LibFunc_memcmpeq)) {
+    Value *LHS = CI->getArgOperand(0);
+    Value *RHS = CI->getArgOperand(1);
+    Value *Size = CI->getArgOperand(2);
+    return copyFlags(*CI, emitMemCmpEq(LHS, RHS, Size, B, DL, TLI));
+  }
+  return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemCmpEq(CallInst *CI, IRBuilderBase &B) {
   return optimizeMemCmpBCmpCommon(CI, B);
 }
 
@@ -3053,6 +3081,8 @@
       return optimizeMemRChr(CI, Builder);
     case LibFunc_bcmp:
       return optimizeBCmp(CI, Builder);
+    case LibFunc_memcmpeq:
+      return optimizeMemCmpEq(CI, Builder);
     case LibFunc_memcmp:
       return optimizeMemCmp(CI, Builder);
     case LibFunc_memcpy:
diff --git a/llvm/test/Transforms/InstCombine/memcmp-1.ll b/llvm/test/Transforms/InstCombine/memcmp-1.ll
--- a/llvm/test/Transforms/InstCombine/memcmp-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcmp-1.ll
@@ -2,6 +2,7 @@
 ;
 ; RUN: opt < %s -passes=instcombine -S | FileCheck --check-prefix=CHECK --check-prefix=NOBCMP %s
 ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=CHECK --check-prefix=BCMP %s
+; RUN: opt < %s -passes=instcombine --with-builtin-memcmpeq -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=CHECK --check-prefix=MEMCMPEQ %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64"
 
@@ -144,6 +145,11 @@
 ; BCMP-NEXT:    [[CALL:%.*]] = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 %size)
 ; BCMP-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; BCMP-NEXT:    ret i1 [[CMP]]
+;
+; MEMCMPEQ-LABEL: @test_simplify10(
+; MEMCMPEQ-NEXT:    [[CALL:%.*]] = call i32 @__memcmpeq(i8* %mem1, i8* %mem2, i32 %size)
+; MEMCMPEQ-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; MEMCMPEQ-NEXT:    ret i1 [[CMP]]
 ;
   %call = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 %size)
   %cmp = icmp eq i32 %call, 0
diff --git a/llvm/test/Transforms/InstCombine/strcmp-1.ll b/llvm/test/Transforms/InstCombine/strcmp-1.ll
--- a/llvm/test/Transforms/InstCombine/strcmp-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcmp-1.ll
@@ -2,6 +2,7 @@
 ; Test that the strcmp library call simplifier works correctly.
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s --check-prefix=NOBCMP
 ; RUN: opt < %s -passes=instcombine -mtriple=unknown-unknown-linux-gnu -S | FileCheck %s --check-prefix=BCMP
+; RUN: opt < %s -passes=instcombine --with-builtin-memcmpeq -mtriple=unknown-unknown-linux-gnu -S | FileCheck %s --check-prefix=MEMCMPEQ
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -140,6 +141,12 @@
 ; BCMP-NEXT:    [[BCMP:%.*]] = call i32 @bcmp(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* noundef nonnull dereferenceable(5) [[STR2]], i32 5)
 ; BCMP-NEXT:    [[RES:%.*]] = icmp eq i32 [[BCMP]], 0
 ; BCMP-NEXT:    ret i1 [[RES]]
+;
+; MEMCMPEQ-LABEL: @test7(
+; MEMCMPEQ-NEXT:    [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
+; MEMCMPEQ-NEXT:    [[MEMCMPEQ:%.*]] = call i32 @__memcmpeq(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* noundef nonnull dereferenceable(5) [[STR2]], i32 5)
+; MEMCMPEQ-NEXT:    [[RES:%.*]] = icmp eq i32 [[MEMCMPEQ]], 0
+; MEMCMPEQ-NEXT:    ret i1 [[RES]]
 ;
 
 
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -234,6 +234,7 @@
       "declare x86_fp80 @logl(x86_fp80)\n"
       "declare i8* @malloc(i64)\n"
       "declare i8* @memccpy(i8*, i8*, i32, i64)\n"
+      "declare i8* @__memcmpeq(i8*, i8*, i64)\n"
       "declare i8* @memchr(i8*, i32, i64)\n"
       "declare i32 @memcmp(i8*, i8*, i64)\n"
       "declare i8* @memcpy(i8*, i8*, i64)\n"