Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1025,6 +1025,8 @@
 
 static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                              SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
+  SDValue Cmp;
+  AArch64CC::CondCode AArch64CC;
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     EVT VT = RHS.getValueType();
     uint64_t C = RHSC->getZExtValue();
@@ -1079,9 +1081,38 @@
       }
     }
   }
-
-  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
-  AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+  // Because the imm operand of ADDS is an unsigned immediate, in the range 0 to
+  // 4095.For i8 operand, the largest immediate is 255, it can be handled well.
+  // For i16 operand, the largest immediate is larger than 4095, it should be
+  // optimized here.
+  // eg:movz w1, #65535; ldrh w0, [x0, #0]; cmp w0, w1
+  // => ldrsh w0, [x0, #0]; cmn w0, #1
+  // Checking whether it is profitable to do the optimization:
+  // 1.LHS and RHS must be 16 bits zero extend operands, and then sign extend
+  //   them
+  // 2.LHS also must be a LOADNODE and have just one use, in order to combine
+  //   ldrh with sign_extend_inreg to ldrsh.
+  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
+    if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
+        isa<LoadSDNode>(LHS) &&
+        cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+        cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+        LHS.getNode()->hasNUsesOfValue(1, 0)) {
+      int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+      if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+        SDValue SExt =
+            DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+                        DAG.getValueType(MVT::i16));
+        Cmp = emitComparison(
+            SExt, DAG.getConstant(ValueofRHS, RHS.getValueType()), CC, dl, DAG);
+        AArch64CC = changeIntCCToAArch64CC(CC);
+        AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+        return Cmp;
+      }
+    }
+  }
+  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+  AArch64CC = changeIntCCToAArch64CC(CC);
   AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
   return Cmp;
 }
Index: test/CodeGen/AArch64/cmpwithshort.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/cmpwithshort.ll
@@ -0,0 +1,46 @@
+; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s 
+
+define i16 @test_1cmp_signed_1(i16* %ptr1) {
+; CHECK-LABLE: @test_1cmp_signed_1
+; CHECK: ldrsh
+; CHECK-NEXT: cmn
+entry:
+  %addr = getelementptr inbounds i16* %ptr1, i16 0
+  %val = load i16* %addr, align 2
+  %cmp = icmp eq i16 %val, -1
+  br i1 %cmp, label %if, label %if.then
+if:
+  ret i16 1
+if.then:
+  ret i16 0
+}
+
+define i16 @test_1cmp_signed_2(i16* %ptr1) {
+; CHECK-LABLE: @test_1cmp_signed_2
+; CHECK: ldrsh
+; CHECK-NEXT: cmn
+entry:
+  %addr = getelementptr inbounds i16* %ptr1, i16 0
+  %val = load i16* %addr, align 2
+  %cmp = icmp sge i16 %val, -1
+  br i1 %cmp, label %if, label %if.then
+if:
+  ret i16 1
+if.then:
+  ret i16 0
+}
+
+define i16 @test_1cmp_unsigned_1(i16* %ptr1) {
+; CHECK-LABLE: @test_1cmp_unsigned_1
+; CHECK: ldrsh
+; CHECK-NEXT: cmn
+entry:
+  %addr = getelementptr inbounds i16* %ptr1, i16 0
+  %val = load i16* %addr, align 2
+  %cmp = icmp uge i16 %val, -1
+  br i1 %cmp, label %if, label %if.then
+if:
+  ret i16 1
+if.then:
+  ret i16 0
+}