diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -869,6 +869,15 @@
   /// integer type VT, by either zero-extending or truncating it.
   SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
 
+  /// Check if \param Val is a zero extention inreg. The zero extend
+  /// inreg operation is rendered with and AND instruction and a
+  /// suitable constant that set the high bits of the register to
+  /// zero. For example, a zero extend inreg of an i8 value loaded as
+  /// anyext into a i32 value is rendered as:
+  ///
+  /// i32 = AND i32 (load anyext from i8), i32 Constant<255>.
+  bool isZeroExtendInReg(SDValue Val) const;
+
   /// Return the expression required to zero extend the Op
   /// value assuming it was the smaller SrcTy value.
   SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11279,16 +11279,31 @@
       }
   }
 
+  // Checks if the uses of a load are extensions that differ in signedness.
+  auto UsesDifferInSignExtension = [this](LoadSDNode *Load) -> bool {
+    if (Load->use_size() != 2)
+      return false;
+
+    SDNode::use_iterator UseIt = Load->use_begin();
+    SDNode *UseOne = *UseIt;
+    SDNode *UseTwo = *++UseIt;
+    if (UseOne->getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        this->DAG.isZeroExtendInReg(SDValue(UseTwo, 0)))
+      return true;
+
+    return false;
+  };
+
   // fold (sext_inreg (extload x)) -> (sextload x)
   // If sextload is not supported by target, we can only do the combine when
   // load has one use. Doing otherwise can block folding the extload with other
   // extends that the target does support.
-  if (ISD::isEXTLoad(N0.getNode()) &&
-      ISD::isUNINDEXEDLoad(N0.getNode()) &&
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
         N0.hasOneUse()) ||
-       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) &&
+      !UsesDifferInSignExtension(cast<LoadSDNode>(N0))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
                                      LN0->getChain(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1168,6 +1168,54 @@
   return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
 }
 
+bool SelectionDAG::isZeroExtendInReg(SDValue Val) const {
+  // assert(NarrowVT.isInteger() && "Unexpected EVT in input.");
+  if (Val->getOpcode() != ISD::AND)
+    return false;
+
+  // The LHS must be an extended load (any of sext/zext/anyext) from
+  // the input type NarrowVT to the WideVT used by the AND.
+  SDValue LHS = Val.getOperand(0);
+  if (!ISD::isEXTLoad(LHS.getNode()))
+    return false;
+
+  EVT NarrowVT = cast<LoadSDNode>(LHS)->getMemoryVT();
+  EVT WideVT = Val.getValueType();
+
+  // They must be either vector or scalar types.
+  if (NarrowVT.isVector() && !WideVT.isVector())
+    return false;
+  if (!NarrowVT.isVector() && WideVT.isVector())
+    return false;
+
+  // If vectors, they must have the same element count.
+  if (NarrowVT.isVector() && WideVT.isVector() &&
+      (NarrowVT.getVectorElementCount() != WideVT.getVectorElementCount()))
+    return false;
+  // The type we are extending from must be smaller than the one used
+  // in the AND.
+  if (!NarrowVT.bitsLE(WideVT))
+    return false;
+
+  // If the types are the same there is no zero extension going on.
+  if (NarrowVT == WideVT)
+    return false;
+
+  // RHS must be the constant that zeroes all the bits of the
+  // extension: 255 for i8 (0xff), 65535 for i16 (0xffff), and so
+  // on...
+  auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(1));
+  if (!C)
+    return false;
+
+  APInt Imm = APInt::getLowBitsSet(WideVT.getScalarSizeInBits(),
+                                   NarrowVT.getScalarSizeInBits());
+  if (C->getSExtValue() != Imm)
+    return false;
+
+  return true;
+}
+
 SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
   EVT OpVT = Op.getValueType();
   assert(VT.isInteger() && OpVT.isInteger() &&
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5887,6 +5887,16 @@
         // (a.k.a. TST) and the test in the test bit and branch instruction
         // becomes redundant.  This would also increase register pressure.
         uint64_t Mask = LHS.getValueSizeInBits() - 1;
+        // If LHS is a sext_inreg, we can check the sign bit of the
+        // original unextended data.
+        if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+          Mask = cast<VTSDNode>(LHS.getOperand(1))
+                     ->getVT()
+                     .getSizeInBits()
+                     .getFixedSize() -
+                 1;
+          LHS = LHS.getOperand(0);
+        }
         return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
                            DAG.getConstant(Mask, dl, MVT::i64), Dest);
       }
@@ -5897,6 +5907,16 @@
       // (a.k.a. TST) and the test in the test bit and branch instruction
       // becomes redundant.  This would also increase register pressure.
       uint64_t Mask = LHS.getValueSizeInBits() - 1;
+      // If LHS is a sext_inreg, we can check the sign bit of the
+      // original unextended data.
+      if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+        Mask = cast<VTSDNode>(LHS.getOperand(1))
+                   ->getVT()
+                   .getSizeInBits()
+                   .getFixedSize() -
+               1;
+        LHS = LHS.getOperand(0);
+      }
       return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
                          DAG.getConstant(Mask, dl, MVT::i64), Dest);
     }
diff --git a/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll b/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple aarch64-linux-gnu -o -  -asm-verbose=0 < %s | FileCheck %s
+
+define i32 @f_i32_i8(i8* %p) nounwind {
+; CHECK-LABEL: f_i32_i8:
+; CHECK-NEXT:          ldrb   w[[N:[0-9]+]], [x0]
+; CHECK-NEXT:          tbnz    w[[N]], #7, .LBB[[BB:.*]]
+; CHECK-NEXT:          add    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          mul    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i8, i8* %p
+  %conv = zext i8 %0 to i32
+  %cmp = icmp sgt i8 %0, -1
+  br i1 %cmp, label %A, label %B
+
+A:
+  %retval2 = add i32 %conv, %conv
+  ret i32 %retval2
+
+B:
+  %retval1 = mul i32 %conv, %conv
+  ret i32 %retval1
+}
+
+define i32 @f_i32_i16(i16* %p) nounwind {
+; CHECK-LABEL: f_i32_i16:
+; CHECK-NEXT:          ldrh   w[[N:[0-9]+]], [x0]
+; CHECK-NEXT:          tbnz    w[[N]], #15, .LBB[[BB:.*]]
+; CHECK-NEXT:          add    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          mul    w0, w[[N]], w[[N]]
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i16, i16* %p
+  %conv = zext i16 %0 to i32
+  %cmp = icmp sgt i16 %0, -1
+  br i1 %cmp, label %A, label %B
+
+A:
+  %retval2 = add i32 %conv, %conv
+  ret i32 %retval2
+
+B:
+  %retval1 = mul i32 %conv, %conv
+  ret i32 %retval1
+}
+
+define i32 @g_i32_i8(i8* %p) nounwind {
+; CHECK-LABEL: g_i32_i8:
+; CHECK-NEXT:          ldrb    w0, [x0]
+; CHECK-NEXT:          tbnz    w0, #7, .LBB[[BB:.*]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          lsl   w0, w0, #1
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i8, i8* %p, align 1
+  %conv = zext i8 %0 to i32
+  %cmp1 = icmp sgt i8 %0, -1
+  br i1 %cmp1, label %return, label %B
+
+B:                                                ; preds = %entry
+  %add = shl nuw nsw i32 %conv, 1
+  ret i32 %add
+
+return:                                           ; preds = %entry
+  ret i32 %conv
+}
+
+define i32 @g_i32_i16(i16* %p) nounwind {
+; CHECK-LABEL: g_i32_i16:
+; CHECK-NEXT:          ldrh    w0, [x0]
+; CHECK-NEXT:          tbnz    w0, #15, .LBB[[BB:.*]]
+; CHECK-NEXT:          ret
+; CHECK-NEXT: .LBB[[BB]]
+; CHECK-NEXT:          lsl   w0, w0, #1
+; CHECK-NEXT:          ret
+entry:
+  %0 = load i16, i16* %p, align 1
+  %conv = zext i16 %0 to i32
+  %cmp1 = icmp sgt i16 %0, -1
+  br i1 %cmp1, label %return, label %B
+
+B:                                                ; preds = %entry
+  %add = shl nuw nsw i32 %conv, 1
+  ret i32 %add
+
+return:                                           ; preds = %entry
+  ret i32 %conv
+}
+
diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
@@ -82,19 +82,19 @@
 ; ENABLE-NEXT:    bhi .LBB0_7
 ; ENABLE-NEXT:  @ %bb.14: @ %while.body24.preheader
 ; ENABLE-NEXT:    @ in Loop: Header=BB0_7 Depth=1
-; ENABLE-NEXT:    sub r3, r3, #2
+; ENABLE-NEXT:    sub lr, r3, #2
 ; ENABLE-NEXT:  .LBB0_15: @ %while.body24
 ; ENABLE-NEXT:    @ Parent Loop BB0_7 Depth=1
 ; ENABLE-NEXT:    @ => This Inner Loop Header: Depth=2
-; ENABLE-NEXT:    mov r0, r3
-; ENABLE-NEXT:    cmp r3, r2
+; ENABLE-NEXT:    mov r0, lr
+; ENABLE-NEXT:    cmp lr, r2
 ; ENABLE-NEXT:    bls .LBB0_7
 ; ENABLE-NEXT:  @ %bb.16: @ %while.body24.land.rhs14_crit_edge
 ; ENABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
-; ENABLE-NEXT:    mov r3, r0
-; ENABLE-NEXT:    ldrsb lr, [r3], #-1
-; ENABLE-NEXT:    cmn lr, #1
-; ENABLE-NEXT:    uxtb r12, lr
+; ENABLE-NEXT:    mov lr, r0
+; ENABLE-NEXT:    ldrb r12, [lr], #-1
+; ENABLE-NEXT:    sxtb r3, r12
+; ENABLE-NEXT:    cmn r3, #1
 ; ENABLE-NEXT:    bgt .LBB0_7
 ; ENABLE-NEXT:  @ %bb.17: @ %while.body24.land.rhs14_crit_edge
 ; ENABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
@@ -172,19 +172,19 @@
 ; DISABLE-NEXT:    bhi .LBB0_7
 ; DISABLE-NEXT:  @ %bb.14: @ %while.body24.preheader
 ; DISABLE-NEXT:    @ in Loop: Header=BB0_7 Depth=1
-; DISABLE-NEXT:    sub r3, r3, #2
+; DISABLE-NEXT:    sub lr, r3, #2
 ; DISABLE-NEXT:  .LBB0_15: @ %while.body24
 ; DISABLE-NEXT:    @ Parent Loop BB0_7 Depth=1
 ; DISABLE-NEXT:    @ => This Inner Loop Header: Depth=2
-; DISABLE-NEXT:    mov r0, r3
-; DISABLE-NEXT:    cmp r3, r2
+; DISABLE-NEXT:    mov r0, lr
+; DISABLE-NEXT:    cmp lr, r2
 ; DISABLE-NEXT:    bls .LBB0_7
 ; DISABLE-NEXT:  @ %bb.16: @ %while.body24.land.rhs14_crit_edge
 ; DISABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2
-; DISABLE-NEXT:    mov r3, r0
-; DISABLE-NEXT:    ldrsb lr, [r3], #-1
-; DISABLE-NEXT:    cmn lr, #1
-; DISABLE-NEXT:    uxtb r12, lr
+; DISABLE-NEXT:    mov lr, r0
+; DISABLE-NEXT:    ldrb r12, [lr], #-1
+; DISABLE-NEXT:    sxtb r3, r12
+; DISABLE-NEXT:    cmn r3, #1
 ; DISABLE-NEXT:    bgt .LBB0_7
 ; DISABLE-NEXT:  @ %bb.17: @ %while.body24.land.rhs14_crit_edge
 ; DISABLE-NEXT:    @ in Loop: Header=BB0_15 Depth=2