Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
===================================================================
--- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -295,31 +295,69 @@
   return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
 }
 
+// Helper to transform memchr(S, C, N) == S to N && *S == C and, when
+// NBytesis null, strchr(S, C) to *S == C.  A precondition of the function
+// is that either S is dereferenceable or the value of N is nonzero.
+static Value* MemChrToCharCompare(CallInst *CI, Value *NBytes,
+                                  IRBuilderBase &B, const DataLayout &DL)
+{
+  Value *Src = CI->getArgOperand(0);
+  Value *CharVal = CI->getArgOperand(1);
+
+  // Fold memchr(A, C, N) == A to N && *A == C.
+  Type *CharTy = B.getInt8Ty();
+  Value *Char0 = B.CreateLoad(CharTy, Src);
+  CharVal = B.CreateTrunc(CharVal, CharTy);
+  Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
+
+  if (NBytes) {
+    Value *Zero = ConstantInt::get(NBytes->getType(), 0);
+    Value *And = B.CreateICmpNE(NBytes, Zero);
+    Cmp = B.CreateLogicalAnd(And, Cmp);
+  }
+
+  Value *NullPtr = Constant::getNullValue(CI->getType());
+  return B.CreateSelect(Cmp, Src, NullPtr);
+}
+
 Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
-  Function *Callee = CI->getCalledFunction();
-  FunctionType *FT = Callee->getFunctionType();
   Value *SrcStr = CI->getArgOperand(0);
+  Value *CharVal = CI->getArgOperand(1);
   annotateNonNullNoUndefBasedOnAccess(CI, 0);
 
+  if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+    return MemChrToCharCompare(CI, nullptr, B, DL);
+
   // If the second operand is non-constant, see if we can compute the length
   // of the input string and turn this into memchr.
-  ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
   if (!CharC) {
     uint64_t Len = GetStringLength(SrcStr);
     if (Len)
       annotateDereferenceableBytes(CI, 0, Len);
     else
       return nullptr;
+
+    Function *Callee = CI->getCalledFunction();
+    FunctionType *FT = Callee->getFunctionType();
     if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
       return nullptr;
 
     return copyFlags(
         *CI,
-        emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+        emitMemChr(SrcStr, CharVal, // include nul.
                    ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B,
                    DL, TLI));
   }
 
+  if (CharC->isZero()) {
+    Value *NullPtr = Constant::getNullValue(CI->getType());
+    if (isOnlyUsedInEqualityComparison(CI, NullPtr))
+      // Pre-empt the transformation to strlen below and fold
+      // strchr(A, '\0') == null to false.
+      return B.CreateIntToPtr(B.getTrue(), CI->getType());
+  }
+
   // Otherwise, the character is a constant, see if the first argument is
   // a string literal.  If so, we can constant fold.
   StringRef Str;
@@ -1014,8 +1052,12 @@
 Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
   Value *SrcStr = CI->getArgOperand(0);
   Value *Size = CI->getArgOperand(2);
-  if (isKnownNonZero(Size, DL))
+
+  if (isKnownNonZero(Size, DL)) {
     annotateNonNullNoUndefBasedOnAccess(CI, 0);
+    if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+      return MemChrToCharCompare(CI, Size, B, DL);
+  }
 
   Value *CharVal = CI->getArgOperand(1);
   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
@@ -1105,9 +1147,16 @@
     return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
   }
 
-  if (!LenC)
+  if (!LenC) {
+    if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+      // S is dereferenceable so it's safe to load from it and fold
+      //   memcmp(S, C, N) == S to N && *S == C for any C and N.
+      // TODO: This is safe even even for nonconstant S.
+      return MemChrToCharCompare(CI, Size, B, DL);
+
     // From now on we need a constant length and constant array.
     return nullptr;
+  }
 
   // If the char is variable but the input str and length are not we can turn
   // this memchr call into a simple bit field test. Of course this only works
Index: llvm/test/Transforms/InstCombine/memchr-11.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/memchr-11.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that the result of memchr calls used in equality expressions
+; with either the first argument or null are optimally folded.
+
+declare i8* @memchr(i8*, i32, i64)
+
+
+@a5 = constant [5 x i8] c"12345"
+
+; Fold memchr(a5, c, 5) == a5 to *a5 == c.
+
+define i1 @fold_memchr_a_c_5_eq_a(i32 %c) {
+; CHECK-LABEL: @fold_memchr_a_c_5_eq_a(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 49
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %q = call i8* @memchr(i8* %p, i32 %c, i64 5)
+  %cmp = icmp eq i8* %q, %p
+  ret i1 %cmp
+}
+
+
+; Fold memchr(a5, c, n) == a5 to n && *a5 == c.  Unlike the case when
+; the first argument is an arbitrary, including potentially past-the-end,
+; pointer, this is safe because a5 is dereferenceable.
+
+define i1 @fold_memchr_a_c_n_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memchr_a_c_n_eq_a(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 49
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[N:%.*]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i1 [[CHAR0CMP]], i1 false
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %q = call i8* @memchr(i8* %p, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, %p
+  ret i1 %cmp
+}
+
+
+; Do not fold memchr(a5 + i, c, n).
+
+define i1 @call_memchr_api_c_n_eq_a(i64 %i, i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memchr_api_c_n_eq_a(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr [5 x i8], [5 x i8]* @a5, i64 0, i64 [[I:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = call i8* @memchr(i8* [[P]], i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[Q]], [[P]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i64 0, i64 %i
+  %q = call i8* @memchr(i8* %p, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, %p
+  ret i1 %cmp
+}
+
+
+; Fold memchr(s, c, 15) == s to *s == c.
+
+define i1 @fold_memchr_s_c_15_eq_s(i8* %s, i32 %c) {
+; CHECK-LABEL: @fold_memchr_s_c_15_eq_s(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %p = call i8* @memchr(i8* %s, i32 %c, i64 15)
+  %cmp = icmp eq i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; Fold memchr(s, c, 17) != s to *s != c.
+
+define i1 @fold_memchr_s_c_17_neq_s(i8* %s, i32 %c) {
+; CHECK-LABEL: @fold_memchr_s_c_17_neq_s(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp ne i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %p = call i8* @memchr(i8* %s, i32 %c, i64 17)
+  %cmp = icmp ne i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; Fold memchr(s, c, n) == s to *s == c for nonzero n.
+
+define i1 @fold_memchr_s_c_nz_eq_s(i8* %s, i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memchr_s_c_nz_eq_s(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %nz = or i64 %n, 1
+  %p = call i8* @memchr(i8* %s, i32 %c, i64 %nz)
+  %cmp = icmp eq i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; But do not fold memchr(s, c, n) as above if n might be zero.  This could
+; be optimized to the equivalent of N && *S == C provided a short-circuiting
+; AND, otherwise the load could read a byte just past the end of an array.
+
+define i1 @call_memchr_s_c_n_eq_s(i8* %s, i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memchr_s_c_n_eq_s(
+; CHECK-NEXT:    [[P:%.*]] = call i8* @memchr(i8* [[S:%.*]], i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[P]], [[S]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = call i8* @memchr(i8* %s, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %p, %s
+  ret i1 %cmp
+}
Index: llvm/test/Transforms/InstCombine/memrchr-8.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/memrchr-8.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that the result of memrchr calls used in equality expressions
+; with the first argument aren't folded like the corresponding calls
+; to memchr might be.
+; Folding of equality expressions with the first argument plus the bound
+; -1, i.e., memrchr(S, C, N) == N && S[N - 1] == C is not implemented.
+
+declare i8* @memrchr(i8*, i32, i64)
+
+
+@a5 = constant [5 x i8] c"12345";
+
+; Do not fold memrchr(a5, c, 9) == a5.  The corresponding call to memchr
+; is folded so this test verifies that the memrchr folder doesn't make
+; the wrong assumption.  The bound of 9 tries to avoid having to adjust
+; the test if the call is folded into a series of ORs as in D128011.
+
+define i1 @call_memrchr_a_c_9_eq_a(i32 %c) {
+; CHECK-LABEL: @call_memrchr_a_c_9_eq_a(
+; CHECK-NEXT:    [[Q:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(9) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i32 [[C:%.*]], i64 9)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[Q]], getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0)
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %q = call i8* @memrchr(i8* %p, i32 %c, i64 9)
+  %cmp = icmp eq i8* %q, %p
+  ret i1 %cmp
+}
+
+
+; Do not fold memrchr(a5, c, n).
+
+define i1 @call_memrchr_a_c_n_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memrchr_a_c_n_eq_a(
+; CHECK-NEXT:    [[Q:%.*]] = call i8* @memrchr(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[Q]], getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0)
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %q = call i8* @memrchr(i8* %p, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, %p
+  ret i1 %cmp
+}
+
+
+; Do not fold memrchr(s, c, 17).
+
+define i1 @call_memrchr_s_c_17_eq_s(i8* %s, i32 %c) {
+; CHECK-LABEL: @call_memrchr_s_c_17_eq_s(
+; CHECK-NEXT:    [[P:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(17) [[S:%.*]], i32 [[C:%.*]], i64 17)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[P]], [[S]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = call i8* @memrchr(i8* %s, i32 %c, i64 17)
+  %cmp = icmp eq i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; Do not fold memrchr(s, c, 9).
+
+define i1 @call_memrchr_s_c_9_neq_s(i8* %s, i32 %c) {
+; CHECK-LABEL: @call_memrchr_s_c_9_neq_s(
+; CHECK-NEXT:    [[P:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(7) [[S:%.*]], i32 [[C:%.*]], i64 7)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8* [[P]], [[S]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = call i8* @memrchr(i8* %s, i32 %c, i64 7)
+  %cmp = icmp ne i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; Do not fold memrchr(s, c, n).
+
+define i1 @fold_memrchr_s_c_n_eq_s(i8* %s, i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memrchr_s_c_n_eq_s(
+; CHECK-NEXT:    [[P:%.*]] = call i8* @memrchr(i8* [[S:%.*]], i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[P]], [[S]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p = call i8* @memrchr(i8* %s, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %p, %s
+  ret i1 %cmp
+}
Index: llvm/test/Transforms/InstCombine/strchr-4.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/strchr-4.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that the result of strchr calls used in equality expressions
+; with either the first argument or null are optimally folded.
+
+declare i8* @strchr(i8*, i32)
+
+
+; Fold strchr(s, c) == s to *s == c.
+
+define i1 @fold_strchr_s_c_eq_s(i8* %s, i32 %c) {
+; CHECK-LABEL: @fold_strchr_s_c_eq_s(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %p = call i8* @strchr(i8* %s, i32 %c)
+  %cmp = icmp eq i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; Fold strchr(s, c) != s to *s != c.
+
+define i1 @fold_strchr_s_c_neq_s(i8* %s, i32 %c) {
+; CHECK-LABEL: @fold_strchr_s_c_neq_s(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp ne i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %p = call i8* @strchr(i8* %s, i32 %c)
+  %cmp = icmp ne i8* %p, %s
+  ret i1 %cmp
+}
+
+
+; Fold strchr(s, '\0') == null to false.  (A string must be nul-terminated,
+; otherwise the call would read past the end of the array.)
+
+define i1 @fold_strchr_s_nul_eqz(i8* %s) {
+; CHECK-LABEL: @fold_strchr_s_nul_eqz(
+; CHECK-NEXT:    ret i1 false
+;
+  %p = call i8* @strchr(i8* %s, i32 0)
+  %cmp = icmp eq i8* %p, null
+  ret i1 %cmp
+}
+
+
+; Fold strchr(s, '\0') != null to true.
+
+define i1 @fold_strchr_s_nul_nez(i8* %s) {
+; CHECK-LABEL: @fold_strchr_s_nul_nez(
+; CHECK-NEXT:    ret i1 true
+;
+  %p = call i8* @strchr(i8* %s, i32 0)
+  %cmp = icmp ne i8* %p, null
+  ret i1 %cmp
+}
+
+
+@a5 = constant [5 x i8] c"12345";
+
+; Fold strchr(a5, c) == a5 to *a5 == c.
+
+define i1 @fold_strchr_a_c_eq_a(i32 %c) {
+; CHECK-LABEL: @fold_strchr_a_c_eq_a(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 49
+; CHECK-NEXT:    ret i1 [[CHAR0CMP]]
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %q = call i8* @strchr(i8* %p, i32 %c)
+  %cmp = icmp eq i8* %q, %p
+  ret i1 %cmp
+}