diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11365,16 +11365,37 @@
       }
   }
 
+  // Finds the pattern implementing the zero extension inreg for
+  // illegal values, which is rendered with an and instruction with a
+  // bit mask. For example, the node for zero extenting the load of an
+  // i8 value into a i32 value is rendered as:
+  //
+  // i32 = (and (load i8) 0xff)
+  auto IsZeroExtInReg = [this](SDNode *N) -> bool {
+    if (N->getOpcode() != ISD::AND)
+      return false;
+
+    auto *AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    auto *LoadN = dyn_cast<LoadSDNode>(N->getOperand(0));
+    if (!AndC || !LoadN)
+      return false;
+
+    EVT LoadResultTy = LoadN->getMemoryVT();
+    EVT ExtVT;
+
+    return isAndLoadExtLoad(AndC, LoadN, LoadResultTy, ExtVT);
+  };
+
   // fold (sext_inreg (extload x)) -> (sextload x)
   // If sextload is not supported by target, we can only do the combine when
   // load has one use. Doing otherwise can block folding the extload with other
   // extends that the target does support.
-  if (ISD::isEXTLoad(N0.getNode()) &&
-      ISD::isUNINDEXEDLoad(N0.getNode()) &&
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
         N0.hasOneUse()) ||
-       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) &&
+      !llvm::any_of(N0->uses(), IsZeroExtInReg)) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
                                      LN0->getChain(),
diff --git a/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll b/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll
@@ -0,0 +1,107 @@
+; RUN: llc -mtriple aarch64-linux-gnu -o -  -asm-verbose=0 < %s | FileCheck %s
+
+; The purpose of the tests `f_*` and `g_*` is to make sure that the
+; zero extension of the load caused by the `zext` instuction is
+; preferred over the signed extension caused by the signed comparison
+; "greater than -1". The effect of prioritizing the zero extension is
+; to avoid the generation of the signed extension of the data being
+; loaded. This is done by making sure that the sign bit of the
+; original unextended data is being checked instead of the sign bit of
+; the sign extended value.
+;
+; The `f_*` and `g_*` differ slightly in their structure to make sure
+; that all the cases that compute the position of the sign bit in
+; AArch64IselLowering.cpp (LowerBR_CC) are covered.
+
+define i32 @f_i32_i8(i8* %p) nounwind {
+; CHECK-LABEL: f_i32_i8:
+; CHECK-NEXT:          ldrb   w[[N:[0-9]+]], [x0]
+; CHECK-NEXT:          tbnz    w[[N]], #7, .LBB[[BB:.*]]
+; CHECK-NEXT:          add    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          mul    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i8, i8* %p
+  %conv = zext i8 %0 to i32
+  %cmp = icmp sgt i8 %0, -1
+  br i1 %cmp, label %A, label %B
+
+A:
+  %retval2 = add i32 %conv, %conv
+  ret i32 %retval2
+
+B:
+  %retval1 = mul i32 %conv, %conv
+  ret i32 %retval1
+}
+
+define i32 @f_i32_i16(i16* %p) nounwind {
+; CHECK-LABEL: f_i32_i16:
+; CHECK-NEXT:          ldrh   w[[N:[0-9]+]], [x0]
+; CHECK-NEXT:          tbnz    w[[N]], #15, .LBB[[BB:.*]]
+; CHECK-NEXT:          add    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          mul    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i16, i16* %p
+  %conv = zext i16 %0 to i32
+  %cmp = icmp sgt i16 %0, -1
+  br i1 %cmp, label %A, label %B
+
+A:
+  %retval2 = add i32 %conv, %conv
+  ret i32 %retval2
+
+B:
+  %retval1 = mul i32 %conv, %conv
+  ret i32 %retval1
+}
+
+define i32 @g_i32_i8(i8* %p) nounwind {
+; CHECK-LABEL: g_i32_i8:
+; CHECK-NEXT:          ldrb    w0, [x0]
+; CHECK-NEXT:          tbnz    w0, #7, .LBB[[BB:.*]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          lsl   w0, w0, #1
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i8, i8* %p, align 1
+  %conv = zext i8 %0 to i32
+  %cmp1 = icmp sgt i8 %0, -1
+  br i1 %cmp1, label %return, label %B
+
+B:                                                ; preds = %entry
+  %add = shl nuw nsw i32 %conv, 1
+  ret i32 %add
+
+return:                                           ; preds = %entry
+  ret i32 %conv
+}
+
+define i32 @g_i32_i16(i16* %p) nounwind {
+; CHECK-LABEL: g_i32_i16:
+; CHECK-NEXT:          ldrh    w0, [x0]
+; CHECK-NEXT:          tbnz    w0, #15, .LBB[[BB:.*]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          lsl   w0, w0, #1
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i16, i16* %p, align 1
+  %conv = zext i16 %0 to i32
+  %cmp1 = icmp sgt i16 %0, -1
+  br i1 %cmp1, label %return, label %B
+
+B:                                                ; preds = %entry
+  %add = shl nuw nsw i32 %conv, 1
+  ret i32 %add
+
+return:                                           ; preds = %entry
+  ret i32 %conv
+}
+
diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
@@ -82,19 +82,19 @@
 ; ENABLE-NEXT:    bhi .LBB0_7
 ; ENABLE-NEXT:  @ %bb.14: @ %while.body24.preheader
 ; ENABLE-NEXT:    @ in Loop: Header=BB0_7 Depth=1
-; ENABLE-NEXT:    sub r3, r3, #2
+; ENABLE-NEXT:    sub lr, r3, #2
 ; ENABLE-NEXT:  .LBB0_15: @ %while.body24
 ; ENABLE-NEXT:    @ Parent Loop BB0_7 Depth=1
 ; ENABLE-NEXT:    @ => This Inner Loop Header: Depth=2
-; ENABLE-NEXT:    mov r0, r3
-; ENABLE-NEXT:    cmp r3, r2
+; ENABLE-NEXT:    mov r0, lr
+; ENABLE-NEXT:    cmp lr, r2
 ; ENABLE-NEXT:    bls .LBB0_7
 ; ENABLE-NEXT:  @ %bb.16: @ %while.body24.land.rhs14_crit_edge
 ; ENABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
-; ENABLE-NEXT:    mov r3, r0
-; ENABLE-NEXT:    ldrsb lr, [r3], #-1
-; ENABLE-NEXT:    cmn lr, #1
-; ENABLE-NEXT:    uxtb r12, lr
+; ENABLE-NEXT:    mov lr, r0
+; ENABLE-NEXT:    ldrb r12, [lr], #-1
+; ENABLE-NEXT:    sxtb r3, r12
+; ENABLE-NEXT:    cmn r3, #1
 ; ENABLE-NEXT:    bgt .LBB0_7
 ; ENABLE-NEXT:  @ %bb.17: @ %while.body24.land.rhs14_crit_edge
 ; ENABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
@@ -172,19 +172,19 @@
 ; DISABLE-NEXT:    bhi .LBB0_7
 ; DISABLE-NEXT:  @ %bb.14: @ %while.body24.preheader
 ; DISABLE-NEXT:    @ in Loop: Header=BB0_7 Depth=1
-; DISABLE-NEXT:    sub r3, r3, #2
+; DISABLE-NEXT:    sub lr, r3, #2
 ; DISABLE-NEXT:  .LBB0_15: @ %while.body24
 ; DISABLE-NEXT:    @ Parent Loop BB0_7 Depth=1
 ; DISABLE-NEXT:    @ => This Inner Loop Header: Depth=2
-; DISABLE-NEXT:    mov r0, r3
-; DISABLE-NEXT:    cmp r3, r2
+; DISABLE-NEXT:    mov r0, lr
+; DISABLE-NEXT:    cmp lr, r2
 ; DISABLE-NEXT:    bls .LBB0_7
 ; DISABLE-NEXT:  @ %bb.16: @ %while.body24.land.rhs14_crit_edge
 ; DISABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
-; DISABLE-NEXT:    mov r3, r0
-; DISABLE-NEXT:    ldrsb lr, [r3], #-1
-; DISABLE-NEXT:    cmn lr, #1
-; DISABLE-NEXT:    uxtb r12, lr
+; DISABLE-NEXT:    mov lr, r0
+; DISABLE-NEXT:    ldrb r12, [lr], #-1
+; DISABLE-NEXT:    sxtb r3, r12
+; DISABLE-NEXT:    cmn r3, #1
 ; DISABLE-NEXT:    bgt .LBB0_7
 ; DISABLE-NEXT:  @ %bb.17: @ %while.body24.land.rhs14_crit_edge
 ; DISABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
diff --git a/llvm/test/CodeGen/ARM/select-imm.ll b/llvm/test/CodeGen/ARM/select-imm.ll
--- a/llvm/test/CodeGen/ARM/select-imm.ll
+++ b/llvm/test/CodeGen/ARM/select-imm.ll
@@ -218,38 +218,65 @@
 ; ARM scheduler emits icmp/zext before both calls, so isn't relevant
 
 ; ARMT2-LABEL: t9:
-; ARMT2: bl f
-; ARMT2: uxtb r0, r4
-; ARMT2: cmp  r0, r0
-; ARMT2: add  r1, r4, #1
-; ARMT2: mov  r2, r0
-; ARMT2: add  r2, r2, #1
-; ARMT2: add  r1, r1, #1
-; ARMT2: uxtb r3, r2
-; ARMT2: cmp  r3, r0
+; ARMT2:       .save   {r4, lr}
+; ARMT2:       push    {r4, lr}
+; ARMT2:       ldrb    r4, [r0]
+; ARMT2:       mov     r0, #1
+; ARMT2:       bl      f
+; ARMT2:       cmp     r4, r4
+; ARMT2:       popne   {r4, pc}
+; ARMT2:    .LBB8_1:
+; ARMT2:       sxtb    r0, r4
+; ARMT2:       add     r0, r0, #1
+; ARMT2:       mov     r1, r4
+; ARMT2:    .LBB8_2:
+; ARMT2:       add     r1, r1, #1
+; ARMT2:       add     r0, r0, #1
+; ARMT2:       uxtb    r2, r1
+; ARMT2:       cmp     r2, r4
+; ARMT2:       blt     .LBB8_2
+; ARMT2:       pop     {r4, pc}
 
 ; THUMB1-LABEL: t9:
-; THUMB1: bl f
-; THUMB1: sxtb r1, r4
-; THUMB1: uxtb r0, r1
-; THUMB1: cmp  r0, r0
-; THUMB1: adds r1, r1, #1
-; THUMB1: mov  r2, r0
-; THUMB1: adds r1, r1, #1
-; THUMB1: adds r2, r2, #1
-; THUMB1: uxtb r3, r2
-; THUMB1: cmp  r3, r0
+; THUMB1:          .save   {r4, lr}
+; THUMB1:          push    {r4, lr}
+; THUMB1:          ldrb    r4, [r0]
+; THUMB1:          movs    r0, #1
+; THUMB1:          bl      f
+; THUMB1:          cmp     r4, r4
+; THUMB1:          bne     .LBB8_3
+; THUMB1:          sxtb    r0, r4
+; THUMB1:          adds    r0, r0, #1
+; THUMB1:          mov     r1, r4
+; THUMB1:  .LBB8_2:
+; THUMB1:          adds    r0, r0, #1
+; THUMB1:          adds    r1, r1, #1
+; THUMB1:          uxtb    r2, r1
+; THUMB1:          cmp     r2, r4
+; THUMB1:          blt     .LBB8_2
+; THUMB1:  .LBB8_3:
+; THUMB1:          pop     {r4, pc}
 
 ; THUMB2-LABEL: t9:
-; THUMB2: bl f
-; THUMB2: uxtb r0, r4
-; THUMB2: cmp  r0, r0
-; THUMB2: adds r1, r4, #1
-; THUMB2: mov  r2, r0
-; THUMB2: adds r2, #1
-; THUMB2: adds r1, #1
-; THUMB2: uxtb r3, r2
-; THUMB2: cmp  r3, r0
+; THUMB2:          .save   {r4, lr}
+; THUMB2:          push    {r4, lr}
+; THUMB2:          ldrb    r4, [r0]
+; THUMB2:          movs    r0, #1
+; THUMB2:          bl      f
+; THUMB2:          cmp     r4, r4
+; THUMB2:          it      ne
+; THUMB2:          popne   {r4, pc}
+; THUMB2:  .LBB8_1:
+; THUMB2:          sxtb    r0, r4
+; THUMB2:          adds    r0, #1
+; THUMB2:          mov     r1, r4
+; THUMB2:  .LBB8_2:
+; THUMB2:          adds    r1, #1
+; THUMB2:          adds    r0, #1
+; THUMB2:          uxtb    r2, r1
+; THUMB2:          cmp     r2, r4
+; THUMB2:          blt     .LBB8_2
+; THUMB2:          pop     {r4, pc}
 
   %0 = load i8, i8* %a
   %conv = sext i8 %0 to i32