diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1433,44 +1433,36 @@
     return B.CreateSub(LHSV, RHSV, "chardiff");
   }
 
-  // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
-  // TODO: The case where both inputs are constants does not need to be limited
-  // to legal integers or equality comparison. See block below this.
-  if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
-    IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
-    unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
-
-    // First, see if we can fold either argument to a constant.
-    Value *LHSV = nullptr;
-    if (auto *LHSC = dyn_cast<Constant>(LHS)) {
-      LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
-      LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
-    }
-    Value *RHSV = nullptr;
-    if (auto *RHSC = dyn_cast<Constant>(RHS)) {
-      RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
-      RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
-    }
+  if (!isOnlyUsedInZeroEqualityComparison(CI))
+    return nullptr;
 
-    // Don't generate unaligned loads. If either source is constant data,
-    // alignment doesn't matter for that source because there is no load.
-    if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
-        (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
-      if (!LHSV) {
-        Type *LHSPtrTy =
-            IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
-        LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
-      }
-      if (!RHSV) {
-        Type *RHSPtrTy =
-            IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
-        RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
-      }
-      return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
-    }
-  }
+  unsigned NBits = Len * 8;
+  if (NBits > DL.getLargestLegalIntTypeSizeInBits())
+    // Limit the transformation to the size of the largest scalar register
+    // and for bigger sizes let ExpandMemCmp do its thing.
+    // TODO: Consider increasing this to something less conservative when
+    // not optimizing for space.
+    return nullptr;
 
-  return nullptr;
+  // memcmp(S1, S2, N) == 0 -> (*(iN*)S1 != *(iN*)S2) == 0
+
+  // Create an integer type NBytes in size (which need not be a power of two)
+  // and use it to load from the (possibly less aligned) array.  The target
+  // emitter will emit as many load instructions as it takes to guarantee
+  // suitable alignment.
+  Type *IntType = B.getIntNTy(NBits);
+
+  unsigned LAS = LHS->getType()->getPointerAddressSpace();
+  Type *LPtrTy = IntType->getPointerTo(LAS);
+  Align LAl = getKnownAlignment(LHS, DL, CI);
+  Value *LHSV = B.CreateAlignedLoad(IntType, B.CreateBitCast(LHS, LPtrTy), LAl);
+
+  unsigned RAS = RHS->getType()->getPointerAddressSpace();
+  Type *RPtrTy = IntType->getPointerTo(RAS);
+  Align RAl = getKnownAlignment(RHS, DL, CI);
+  Value *RHSV = B.CreateAlignedLoad(IntType, B.CreateBitCast(RHS, RPtrTy), RAl);
+
+  return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
 }
 
 // Most simplifications for memcmp also apply to bcmp.
diff --git a/llvm/test/Transforms/InstCombine/memcmp-9.ll b/llvm/test/Transforms/InstCombine/memcmp-9.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcmp-9.ll
@@ -0,0 +1,512 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;
+; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s
+;
+; Verify transformations of memcmp calls involving pointers to unaligned
+; memory or to objects whose size isn't a power of two.
+;
+
+target datalayout = "p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-n8:16:32:64"
+
+@a9 = external global [9 x i8], align 8
+
+declare i32 @memcmp(i8*, i8*, i64)
+
+declare void @sink(i1)
+
+
+define void @fold_memcmp_a_p(ptr %q0) {
+; CHECK-LABEL: @fold_memcmp_a_p(
+; CHECK-NEXT:    [[LHSC:%.*]] = load i8, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[RHSC:%.*]] = load i8, ptr [[Q0:%.*]], align 1
+; CHECK-NEXT:    [[EQZ1_0_1:%.*]] = icmp eq i8 [[LHSC]], [[RHSC]]
+; CHECK-NEXT:    call void @sink(i1 [[EQZ1_0_1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load i24, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i24, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT1:%.*]] = icmp eq i24 [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT1]])
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT2:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT2]])
+; CHECK-NEXT:    [[TMP7:%.*]] = load i40, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i40, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT3:%.*]] = icmp eq i40 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT3]])
+; CHECK-NEXT:    [[TMP9:%.*]] = load i48, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i48, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT4:%.*]] = icmp eq i48 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT4]])
+; CHECK-NEXT:    [[TMP11:%.*]] = load i56, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i56, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT5:%.*]] = icmp eq i56 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT5]])
+; CHECK-NEXT:    [[LHSC6:%.*]] = load i8, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[RHSC7:%.*]] = load i8, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[EQZ2_0_1:%.*]] = icmp eq i8 [[LHSC6]], [[RHSC7]]
+; CHECK-NEXT:    call void @sink(i1 [[EQZ2_0_1]])
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT8:%.*]] = icmp eq i16 [[TMP13]], [[TMP14]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT8]])
+; CHECK-NEXT:    [[TMP15:%.*]] = load i24, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[TMP16:%.*]] = load i24, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT9:%.*]] = icmp eq i24 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT9]])
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT10:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT10]])
+; CHECK-NEXT:    [[TMP19:%.*]] = load i40, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[TMP20:%.*]] = load i40, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT11:%.*]] = icmp eq i40 [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT11]])
+; CHECK-NEXT:    [[TMP21:%.*]] = load i48, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[TMP22:%.*]] = load i48, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT12:%.*]] = icmp eq i48 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT12]])
+; CHECK-NEXT:    [[TMP23:%.*]] = load i56, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[TMP24:%.*]] = load i56, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT13:%.*]] = icmp eq i56 [[TMP23]], [[TMP24]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT13]])
+; CHECK-NEXT:    [[LHSC14:%.*]] = load i8, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[RHSC15:%.*]] = load i8, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[EQZ4_0_1:%.*]] = icmp eq i8 [[LHSC14]], [[RHSC15]]
+; CHECK-NEXT:    call void @sink(i1 [[EQZ4_0_1]])
+; CHECK-NEXT:    [[TMP25:%.*]] = load i16, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[TMP26:%.*]] = load i16, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT16:%.*]] = icmp eq i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT16]])
+; CHECK-NEXT:    [[TMP27:%.*]] = load i24, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[TMP28:%.*]] = load i24, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT17:%.*]] = icmp eq i24 [[TMP27]], [[TMP28]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT17]])
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT18:%.*]] = icmp eq i32 [[TMP29]], [[TMP30]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT18]])
+; CHECK-NEXT:    [[TMP31:%.*]] = load i40, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = load i40, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT19:%.*]] = icmp eq i40 [[TMP31]], [[TMP32]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT19]])
+; CHECK-NEXT:    [[TMP33:%.*]] = load i48, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[TMP34:%.*]] = load i48, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT20:%.*]] = icmp eq i48 [[TMP33]], [[TMP34]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT20]])
+; CHECK-NEXT:    [[TMP35:%.*]] = load i56, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[TMP36:%.*]] = load i56, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT21:%.*]] = icmp eq i56 [[TMP35]], [[TMP36]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT21]])
+; CHECK-NEXT:    [[TMP37:%.*]] = load i16, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP38:%.*]] = load i16, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT22:%.*]] = icmp eq i16 [[TMP37]], [[TMP38]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT22]])
+; CHECK-NEXT:    [[TMP39:%.*]] = load i24, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP40:%.*]] = load i24, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT23:%.*]] = icmp eq i24 [[TMP39]], [[TMP40]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT23]])
+; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT24:%.*]] = icmp eq i32 [[TMP41]], [[TMP42]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT24]])
+; CHECK-NEXT:    [[TMP43:%.*]] = load i40, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP44:%.*]] = load i40, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT25:%.*]] = icmp eq i40 [[TMP43]], [[TMP44]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT25]])
+; CHECK-NEXT:    [[TMP45:%.*]] = load i48, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP46:%.*]] = load i48, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT26:%.*]] = icmp eq i48 [[TMP45]], [[TMP46]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT26]])
+; CHECK-NEXT:    [[TMP47:%.*]] = load i56, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP48:%.*]] = load i56, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT27:%.*]] = icmp eq i56 [[TMP47]], [[TMP48]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT27]])
+; CHECK-NEXT:    [[TMP49:%.*]] = load i64, ptr @a9, align 8
+; CHECK-NEXT:    [[TMP50:%.*]] = load i64, ptr [[Q0]], align 1
+; CHECK-NEXT:    [[DOTNOT28:%.*]] = icmp eq i64 [[TMP49]], [[TMP50]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT28]])
+; CHECK-NEXT:    ret void
+;
+; Exercise memcmp() with two byte aligned blocks.
+  %p1 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 1
+
+; Fold memcmp(a + 1, q, 1) == 0 to (a[1] == *q) != 0.
+  %c1_0_1 = call i32 @memcmp(ptr %p1, ptr %q0, i64 1)
+  %eqz1_0_1 = icmp eq i32 %c1_0_1, 0
+  call void @sink(i1 %eqz1_0_1)
+
+; Exercise memcmp(a + 1, q, 2) == 0 with both unaligned addresses.
+  %c1_0_2 = call i32 @memcmp(ptr %p1, ptr %q0, i64 2)
+  %eqz1_0_2 = icmp eq i32 %c1_0_2, 0
+  call void @sink(i1 %eqz1_0_2)
+
+; Exercise memcmp(a + 1, q, 3) == 0 with both unaligned adrresses.
+  %c1_0_3 = call i32 @memcmp(ptr %p1, ptr %q0, i64 3)
+  %eqz1_0_3 = icmp eq i32 %c1_0_3, 0
+  call void @sink(i1 %eqz1_0_3)
+
+  %c1_0_4 = call i32 @memcmp(ptr %p1, ptr %q0, i64 4)
+  %eqz1_0_4 = icmp eq i32 %c1_0_4, 0
+  call void @sink(i1 %eqz1_0_4)
+
+  %c1_0_5 = call i32 @memcmp(ptr %p1, ptr %q0, i64 5)
+  %eqz1_0_5 = icmp eq i32 %c1_0_5, 0
+  call void @sink(i1 %eqz1_0_5)
+
+  %c1_0_6 = call i32 @memcmp(ptr %p1, ptr %q0, i64 6)
+  %eqz1_0_6 = icmp eq i32 %c1_0_6, 0
+  call void @sink(i1 %eqz1_0_6)
+
+  %c1_0_7 = call i32 @memcmp(ptr %p1, ptr %q0, i64 7)
+  %eqz1_0_7 = icmp eq i32 %c1_0_7, 0
+  call void @sink(i1 %eqz1_0_7)
+
+
+; Exercise memcmp() with a word-aligned and a byte-aligned block.
+  %p2 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 2
+
+  %c2_0_1 = call i32 @memcmp(ptr %p2, ptr %q0, i64 1)
+  %eqz2_0_1 = icmp eq i32 %c2_0_1, 0
+  call void @sink(i1 %eqz2_0_1)
+
+  %c2_0_2 = call i32 @memcmp(ptr %p2, ptr %q0, i64 2)
+  %eqz2_0_2 = icmp eq i32 %c2_0_2, 0
+  call void @sink(i1 %eqz2_0_2)
+
+  %c2_0_3 = call i32 @memcmp(ptr %p2, ptr %q0, i64 3)
+  %eqz2_0_3 = icmp eq i32 %c2_0_3, 0
+  call void @sink(i1 %eqz2_0_3)
+
+  %c2_0_4 = call i32 @memcmp(ptr %p2, ptr %q0, i64 4)
+  %eqz2_0_4 = icmp eq i32 %c2_0_4, 0
+  call void @sink(i1 %eqz2_0_4)
+
+  %c2_0_5 = call i32 @memcmp(ptr %p2, ptr %q0, i64 5)
+  %eqz2_0_5 = icmp eq i32 %c2_0_5, 0
+  call void @sink(i1 %eqz2_0_5)
+
+  %c2_0_6 = call i32 @memcmp(ptr %p2, ptr %q0, i64 6)
+  %eqz2_0_6 = icmp eq i32 %c2_0_6, 0
+  call void @sink(i1 %eqz2_0_6)
+
+  %c2_0_7 = call i32 @memcmp(ptr %p2, ptr %q0, i64 7)
+  %eqz2_0_7 = icmp eq i32 %c2_0_7, 0
+  call void @sink(i1 %eqz2_0_7)
+
+
+; Exercise memcmp() with a double-word-aligned and a byte-aligned block.
+  %p4 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 4
+
+  %c4_0_1 = call i32 @memcmp(ptr %p4, ptr %q0, i64 1)
+  %eqz4_0_1 = icmp eq i32 %c4_0_1, 0
+  call void @sink(i1 %eqz4_0_1)
+
+  %c4_0_2 = call i32 @memcmp(ptr %p4, ptr %q0, i64 2)
+  %eqz4_0_2 = icmp eq i32 %c4_0_2, 0
+  call void @sink(i1 %eqz4_0_2)
+
+  %c4_0_3 = call i32 @memcmp(ptr %p4, ptr %q0, i64 3)
+  %eqz4_0_3 = icmp eq i32 %c4_0_3, 0
+  call void @sink(i1 %eqz4_0_3)
+
+  %c4_0_4 = call i32 @memcmp(ptr %p4, ptr %q0, i64 4)
+  %eqz4_0_4 = icmp eq i32 %c4_0_4, 0
+  call void @sink(i1 %eqz4_0_4)
+
+  %c4_0_5 = call i32 @memcmp(ptr %p4, ptr %q0, i64 5)
+  %eqz4_0_5 = icmp eq i32 %c4_0_5, 0
+  call void @sink(i1 %eqz4_0_5)
+
+  %c4_0_6 = call i32 @memcmp(ptr %p4, ptr %q0, i64 6)
+  %eqz4_0_6 = icmp eq i32 %c4_0_6, 0
+  call void @sink(i1 %eqz4_0_6)
+
+  %c4_0_7 = call i32 @memcmp(ptr %p4, ptr %q0, i64 7)
+  %eqz4_0_7 = icmp eq i32 %c4_0_7, 0
+  call void @sink(i1 %eqz4_0_7)
+
+
+; Exercise memcmp() with a quad-word-aligned and a byte-aligned block.
+  %p8 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 0
+
+  %c8_0_2 = call i32 @memcmp(ptr %p8, ptr %q0, i64 2)
+  %eqz8_0_2 = icmp eq i32 %c8_0_2, 0
+  call void @sink(i1 %eqz8_0_2)
+
+  %c8_0_3 = call i32 @memcmp(ptr %p8, ptr %q0, i64 3)
+  %eqz8_0_3 = icmp eq i32 %c8_0_3, 0
+  call void @sink(i1 %eqz8_0_3)
+
+  %c8_0_4 = call i32 @memcmp(ptr %p8, ptr %q0, i64 4)
+  %eqz8_0_4 = icmp eq i32 %c8_0_4, 0
+  call void @sink(i1 %eqz8_0_4)
+
+  %c8_0_5 = call i32 @memcmp(ptr %p8, ptr %q0, i64 5)
+  %eqz8_0_5 = icmp eq i32 %c8_0_5, 0
+  call void @sink(i1 %eqz8_0_5)
+
+  %c8_0_6 = call i32 @memcmp(ptr %p8, ptr %q0, i64 6)
+  %eqz8_0_6 = icmp eq i32 %c8_0_6, 0
+  call void @sink(i1 %eqz8_0_6)
+
+  %c8_0_7 = call i32 @memcmp(ptr %p8, ptr %q0, i64 7)
+  %eqz8_0_7 = icmp eq i32 %c8_0_7, 0
+  call void @sink(i1 %eqz8_0_7)
+
+  %c8_0_8 = call i32 @memcmp(ptr %p8, ptr %q0, i64 8)
+  %eqz8_0_8 = icmp eq i32 %c8_0_8, 0
+  call void @sink(i1 %eqz8_0_8)
+
+  ret void
+}
+
+
+define void @fold_memcmp_p_a(ptr %p0) {
+; CHECK-LABEL: @fold_memcmp_p_a(
+; CHECK-NEXT:    [[LHSC:%.*]] = load i8, ptr [[P0:%.*]], align 1
+; CHECK-NEXT:    [[RHSC:%.*]] = load i8, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[EQZ0_1_1:%.*]] = icmp eq i8 [[LHSC]], [[RHSC]]
+; CHECK-NEXT:    call void @sink(i1 [[EQZ0_1_1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load i24, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i24, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[DOTNOT1:%.*]] = icmp eq i24 [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT1]])
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[DOTNOT2:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT2]])
+; CHECK-NEXT:    [[TMP7:%.*]] = load i40, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i40, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[DOTNOT3:%.*]] = icmp eq i40 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT3]])
+; CHECK-NEXT:    [[TMP9:%.*]] = load i48, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i48, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[DOTNOT4:%.*]] = icmp eq i48 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT4]])
+; CHECK-NEXT:    [[TMP11:%.*]] = load i56, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i56, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 1), align 1
+; CHECK-NEXT:    [[DOTNOT5:%.*]] = icmp eq i56 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT5]])
+; CHECK-NEXT:    [[LHSC6:%.*]] = load i8, ptr [[P0]], align 1
+; CHECK-NEXT:    [[RHSC7:%.*]] = load i8, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[EQZ0_2_1:%.*]] = icmp eq i8 [[LHSC6]], [[RHSC7]]
+; CHECK-NEXT:    call void @sink(i1 [[EQZ0_2_1]])
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[DOTNOT8:%.*]] = icmp eq i16 [[TMP13]], [[TMP14]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT8]])
+; CHECK-NEXT:    [[TMP15:%.*]] = load i24, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = load i24, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[DOTNOT9:%.*]] = icmp eq i24 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT9]])
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[DOTNOT10:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT10]])
+; CHECK-NEXT:    [[TMP19:%.*]] = load i40, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = load i40, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[DOTNOT11:%.*]] = icmp eq i40 [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT11]])
+; CHECK-NEXT:    [[TMP21:%.*]] = load i48, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP22:%.*]] = load i48, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[DOTNOT12:%.*]] = icmp eq i48 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT12]])
+; CHECK-NEXT:    [[TMP23:%.*]] = load i56, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP24:%.*]] = load i56, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 2), align 2
+; CHECK-NEXT:    [[DOTNOT13:%.*]] = icmp eq i56 [[TMP23]], [[TMP24]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT13]])
+; CHECK-NEXT:    [[LHSC14:%.*]] = load i8, ptr [[P0]], align 1
+; CHECK-NEXT:    [[RHSC15:%.*]] = load i8, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[EQZ0_4_1:%.*]] = icmp eq i8 [[LHSC14]], [[RHSC15]]
+; CHECK-NEXT:    call void @sink(i1 [[EQZ0_4_1]])
+; CHECK-NEXT:    [[TMP25:%.*]] = load i16, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP26:%.*]] = load i16, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[DOTNOT16:%.*]] = icmp eq i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT16]])
+; CHECK-NEXT:    [[TMP27:%.*]] = load i24, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP28:%.*]] = load i24, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[DOTNOT17:%.*]] = icmp eq i24 [[TMP27]], [[TMP28]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT17]])
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[DOTNOT18:%.*]] = icmp eq i32 [[TMP29]], [[TMP30]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT18]])
+; CHECK-NEXT:    [[TMP31:%.*]] = load i40, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP32:%.*]] = load i40, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[DOTNOT19:%.*]] = icmp eq i40 [[TMP31]], [[TMP32]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT19]])
+; CHECK-NEXT:    [[TMP33:%.*]] = load i48, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP34:%.*]] = load i48, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[DOTNOT20:%.*]] = icmp eq i48 [[TMP33]], [[TMP34]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT20]])
+; CHECK-NEXT:    [[TMP35:%.*]] = load i56, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP36:%.*]] = load i56, ptr getelementptr inbounds ([9 x i8], ptr @a9, i64 0, i64 4), align 4
+; CHECK-NEXT:    [[DOTNOT21:%.*]] = icmp eq i56 [[TMP35]], [[TMP36]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT21]])
+; CHECK-NEXT:    [[TMP37:%.*]] = load i16, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP38:%.*]] = load i16, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT22:%.*]] = icmp eq i16 [[TMP37]], [[TMP38]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT22]])
+; CHECK-NEXT:    [[TMP39:%.*]] = load i24, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP40:%.*]] = load i24, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT23:%.*]] = icmp eq i24 [[TMP39]], [[TMP40]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT23]])
+; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT24:%.*]] = icmp eq i32 [[TMP41]], [[TMP42]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT24]])
+; CHECK-NEXT:    [[TMP43:%.*]] = load i40, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP44:%.*]] = load i40, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT25:%.*]] = icmp eq i40 [[TMP43]], [[TMP44]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT25]])
+; CHECK-NEXT:    [[TMP45:%.*]] = load i48, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP46:%.*]] = load i48, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT26:%.*]] = icmp eq i48 [[TMP45]], [[TMP46]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT26]])
+; CHECK-NEXT:    [[TMP47:%.*]] = load i56, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP48:%.*]] = load i56, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT27:%.*]] = icmp eq i56 [[TMP47]], [[TMP48]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT27]])
+; CHECK-NEXT:    [[TMP49:%.*]] = load i64, ptr [[P0]], align 1
+; CHECK-NEXT:    [[TMP50:%.*]] = load i64, ptr @a9, align 8
+; CHECK-NEXT:    [[DOTNOT28:%.*]] = icmp eq i64 [[TMP49]], [[TMP50]]
+; CHECK-NEXT:    call void @sink(i1 [[DOTNOT28]])
+; CHECK-NEXT:    ret void
+;
+; Exercise memcmp() with two byte aligned blocks.
+  %q1 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 1
+
+  %c0_1_1 = call i32 @memcmp(ptr %p0, ptr %q1, i64 1)
+  %eqz0_1_1 = icmp eq i32 %c0_1_1, 0
+  call void @sink(i1 %eqz0_1_1)
+
+  %c1_0_2 = call i32 @memcmp(ptr %p0, ptr %q1, i64 2)
+  %eqz1_0_2 = icmp eq i32 %c1_0_2, 0
+  call void @sink(i1 %eqz1_0_2)
+
+  %c1_0_3 = call i32 @memcmp(ptr %p0, ptr %q1, i64 3)
+  %eqz1_0_3 = icmp eq i32 %c1_0_3, 0
+  call void @sink(i1 %eqz1_0_3)
+
+  %c1_0_4 = call i32 @memcmp(ptr %p0, ptr %q1, i64 4)
+  %eqz1_0_4 = icmp eq i32 %c1_0_4, 0
+  call void @sink(i1 %eqz1_0_4)
+
+  %c1_0_5 = call i32 @memcmp(ptr %p0, ptr %q1, i64 5)
+  %eqz1_0_5 = icmp eq i32 %c1_0_5, 0
+  call void @sink(i1 %eqz1_0_5)
+
+  %c1_0_6 = call i32 @memcmp(ptr %p0, ptr %q1, i64 6)
+  %eqz1_0_6 = icmp eq i32 %c1_0_6, 0
+  call void @sink(i1 %eqz1_0_6)
+
+  %c1_0_7 = call i32 @memcmp(ptr %p0, ptr %q1, i64 7)
+  %eqz1_0_7 = icmp eq i32 %c1_0_7, 0
+  call void @sink(i1 %eqz1_0_7)
+
+
+; Exercise memcmp() with a word-aligned and a byte-aligned block.
+  %q2 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 2
+
+  %c0_2_1 = call i32 @memcmp(ptr %p0, ptr %q2, i64 1)
+  %eqz0_2_1 = icmp eq i32 %c0_2_1, 0
+  call void @sink(i1 %eqz0_2_1)
+
+  %c2_0_2 = call i32 @memcmp(ptr %p0, ptr %q2, i64 2)
+  %eqz2_0_2 = icmp eq i32 %c2_0_2, 0
+  call void @sink(i1 %eqz2_0_2)
+
+  %c2_0_3 = call i32 @memcmp(ptr %p0, ptr %q2, i64 3)
+  %eqz2_0_3 = icmp eq i32 %c2_0_3, 0
+  call void @sink(i1 %eqz2_0_3)
+
+  %c2_0_4 = call i32 @memcmp(ptr %p0, ptr %q2, i64 4)
+  %eqz2_0_4 = icmp eq i32 %c2_0_4, 0
+  call void @sink(i1 %eqz2_0_4)
+
+  %c2_0_5 = call i32 @memcmp(ptr %p0, ptr %q2, i64 5)
+  %eqz2_0_5 = icmp eq i32 %c2_0_5, 0
+  call void @sink(i1 %eqz2_0_5)
+
+  %c2_0_6 = call i32 @memcmp(ptr %p0, ptr %q2, i64 6)
+  %eqz2_0_6 = icmp eq i32 %c2_0_6, 0
+  call void @sink(i1 %eqz2_0_6)
+
+  %c2_0_7 = call i32 @memcmp(ptr %p0, ptr %q2, i64 7)
+  %eqz2_0_7 = icmp eq i32 %c2_0_7, 0
+  call void @sink(i1 %eqz2_0_7)
+
+
+; Exercise memcmp() with a double-word-aligned and a byte-aligned block.
+  %q4 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 4
+
+  %c0_4_1 = call i32 @memcmp(ptr %p0, ptr %q4, i64 1)
+  %eqz0_4_1 = icmp eq i32 %c0_4_1, 0
+  call void @sink(i1 %eqz0_4_1)
+
+  %c4_0_2 = call i32 @memcmp(ptr %p0, ptr %q4, i64 2)
+  %eqz4_0_2 = icmp eq i32 %c4_0_2, 0
+  call void @sink(i1 %eqz4_0_2)
+
+  %c4_0_3 = call i32 @memcmp(ptr %p0, ptr %q4, i64 3)
+  %eqz4_0_3 = icmp eq i32 %c4_0_3, 0
+  call void @sink(i1 %eqz4_0_3)
+
+  %c4_0_4 = call i32 @memcmp(ptr %p0, ptr %q4, i64 4)
+  %eqz4_0_4 = icmp eq i32 %c4_0_4, 0
+  call void @sink(i1 %eqz4_0_4)
+
+  %c4_0_5 = call i32 @memcmp(ptr %p0, ptr %q4, i64 5)
+  %eqz4_0_5 = icmp eq i32 %c4_0_5, 0
+  call void @sink(i1 %eqz4_0_5)
+
+  %c4_0_6 = call i32 @memcmp(ptr %p0, ptr %q4, i64 6)
+  %eqz4_0_6 = icmp eq i32 %c4_0_6, 0
+  call void @sink(i1 %eqz4_0_6)
+
+  %c4_0_7 = call i32 @memcmp(ptr %p0, ptr %q4, i64 7)
+  %eqz4_0_7 = icmp eq i32 %c4_0_7, 0
+  call void @sink(i1 %eqz4_0_7)
+
+
+; Exercise memcmp() with a quad-word-aligned and a byte-aligned block.
+  %q8 = getelementptr [9 x i8], [9 x i8]* @a9, i32 0, i32 0
+
+  %c8_0_2 = call i32 @memcmp(ptr %p0, ptr %q8, i64 2)
+  %eqz8_0_2 = icmp eq i32 %c8_0_2, 0
+  call void @sink(i1 %eqz8_0_2)
+
+  %c8_0_3 = call i32 @memcmp(ptr %p0, ptr %q8, i64 3)
+  %eqz8_0_3 = icmp eq i32 %c8_0_3, 0
+  call void @sink(i1 %eqz8_0_3)
+
+  %c8_0_4 = call i32 @memcmp(ptr %p0, ptr %q8, i64 4)
+  %eqz8_0_4 = icmp eq i32 %c8_0_4, 0
+  call void @sink(i1 %eqz8_0_4)
+
+  %c8_0_5 = call i32 @memcmp(ptr %p0, ptr %q8, i64 5)
+  %eqz8_0_5 = icmp eq i32 %c8_0_5, 0
+  call void @sink(i1 %eqz8_0_5)
+
+  %c8_0_6 = call i32 @memcmp(ptr %p0, ptr %q8, i64 6)
+  %eqz8_0_6 = icmp eq i32 %c8_0_6, 0
+  call void @sink(i1 %eqz8_0_6)
+
+  %c8_0_7 = call i32 @memcmp(ptr %p0, ptr %q8, i64 7)
+  %eqz8_0_7 = icmp eq i32 %c8_0_7, 0
+  call void @sink(i1 %eqz8_0_7)
+
+  %c8_0_8 = call i32 @memcmp(ptr %p0, ptr %q8, i64 8)
+  %eqz8_0_8 = icmp eq i32 %c8_0_8, 0
+  call void @sink(i1 %eqz8_0_8)
+
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll b/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
--- a/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
+++ b/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
@@ -11,14 +11,14 @@
 define i1 @memcmp_4bytes_unaligned_constant_i8(i8* align 4 %x) {
 ; LE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
 ; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; LE-NEXT:    [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
-; LE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[LHSV]], 16777216
+; LE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 16777216
 ; LE-NEXT:    ret i1 [[DOTNOT]]
 ;
 ; BE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
 ; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; BE-NEXT:    [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
-; BE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[LHSV]], 1
+; BE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; BE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 1
 ; BE-NEXT:    ret i1 [[DOTNOT]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
@@ -34,14 +34,14 @@
 define i1 @memcmp_4bytes_unaligned_constant_i16(i8* align 4 %x) {
 ; LE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
 ; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; LE-NEXT:    [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
-; LE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[RHSV]], 131073
+; LE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 131073
 ; LE-NEXT:    ret i1 [[DOTNOT]]
 ;
 ; BE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
 ; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; BE-NEXT:    [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
-; BE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[RHSV]], 65538
+; BE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; BE-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 65538
 ; BE-NEXT:    ret i1 [[DOTNOT]]
 ;
   %call = tail call i32 @memcmp(i8* bitcast (i16* getelementptr inbounds ([4 x i16], [4 x i16]* @intbuf_unaligned, i64 0, i64 0) to i8*), i8* %x, i64 4)
@@ -70,9 +70,12 @@
 
 define i1 @memcmp_4bytes_one_unaligned_i8(i8* align 4 %x, i8* align 1 %y) {
 ; ALL-LABEL: @memcmp_4bytes_one_unaligned_i8(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[X:%.*]], i8* noundef nonnull dereferenceable(4) [[Y:%.*]], i64 4)
-; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    ret i1 [[CMPEQ0]]
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; ALL-NEXT:    [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 1
+; ALL-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[TMP4]]
+; ALL-NEXT:    ret i1 [[DOTNOT]]
 ;
   %bc = bitcast i8* %x to i32*
   %lhsv = load i32, i32* %bc
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -81,10 +81,12 @@
 define i1 @PR2341(i8** %start_addr) {
 ; CHECK32-LABEL: @PR2341(
 ; CHECK32-NEXT:  entry:
-; CHECK32-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[START_ADDR:%.*]], align 4
-; CHECK32-NEXT:    [[TMP5:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[TMP4]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([5 x i8], [5 x i8]* @_2E_str, i32 0, i32 0), i32 4) #[[ATTR0:[0-9]+]]
-; CHECK32-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
-; CHECK32-NEXT:    ret i1 [[TMP6]]
+; CHECK32-NEXT:    [[TMP0:%.*]] = bitcast i8** [[START_ADDR:%.*]] to i32**
+; CHECK32-NEXT:    [[TMP41:%.*]] = load i32*, i32** [[TMP0]], align 4
+; CHECK32-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP41]], align 1
+; CHECK32-NEXT:    [[TMP2:%.*]] = load i32, i32* bitcast ([5 x i8]* @_2E_str to i32*), align 1
+; CHECK32-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
+; CHECK32-NEXT:    ret i1 [[DOTNOT]]
 ;
 ; CHECK16-LABEL: @PR2341(
 ; CHECK16-NEXT:  entry:
@@ -218,7 +220,7 @@
 ; CHECK32-NEXT:    ret void
 ;
 ; CHECK16-LABEL: @test9(
-; CHECK16-NEXT:    [[Y:%.*]] = call i32 @strcmp(i8* [[X:%.*]], i8* [[X]]) #[[ATTR5:[0-9]+]]
+; CHECK16-NEXT:    [[Y:%.*]] = call i32 @strcmp(i8* [[X:%.*]], i8* [[X]]) #[[ATTR6:[0-9]+]]
 ; CHECK16-NEXT:    ret void
 ;
   %y = call i32 @strcmp(i8* %x, i8* %x) #1
diff --git a/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll b/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
--- a/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
+++ b/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
@@ -10,12 +10,15 @@
 
 define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
+; A strcmp call with a short constant string whose result is used in
+; an equality test with zero is transformed into one to memcmp which
+; is then turned into an unaligned load and compare.
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
   %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
   %cmp = icmp eq i32 %call, 0
@@ -27,10 +30,10 @@
 
 define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp2(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -42,10 +45,10 @@
 
 define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp3(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP3]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -57,10 +60,10 @@
 
 define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp4(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP3]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -72,10 +75,10 @@
 
 define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp5(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [5 x i8], [5 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [5 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [5 x i8], [5 x i8]* %buf, i64 0, i64 0
@@ -116,10 +119,10 @@
 
 define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp8(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [4 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i64 0, i64 0
@@ -131,10 +134,10 @@
 
 define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strcmp_memcmp9(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 6513249
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -147,10 +150,10 @@
 
 define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(2) [[STRING]], i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i16*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i16 [[TMP2]], 25963
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -164,10 +167,10 @@
 
 define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp2(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP3]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -179,10 +182,10 @@
 
 define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp3(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -194,10 +197,10 @@
 
 define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp4(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -209,10 +212,10 @@
 
 define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp5(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -225,10 +228,10 @@
 
 define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp6(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TMP3]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -240,10 +243,10 @@
 
 define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp7(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -255,10 +258,10 @@
 
 define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp8(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(3) [[STRING]], i8* noundef nonnull dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 3)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i24*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i24, i24* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i24 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -299,10 +302,10 @@
 
 define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp11(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -314,10 +317,10 @@
 
 define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp12(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* noundef nonnull dereferenceable(4) [[STRING]], i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7955819
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -329,10 +332,10 @@
 
 define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp13(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(2) [[STRING]], i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i16*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i16 [[TMP2]], 25185
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
@@ -344,10 +347,10 @@
 
 define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) nofree nosync {
 ; CHECK-LABEL: @strncmp_memcmp14(
-; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(4) [[STRING]], i8* noundef nonnull dereferenceable(4) getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [12 x i8]* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 1
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 6513249
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTNOT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
   %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0