Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1025,6 +1025,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { + SDValue Cmp; + AArch64CC::CondCode AArch64CC; if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { EVT VT = RHS.getValueType(); uint64_t C = RHSC->getZExtValue(); @@ -1079,9 +1081,38 @@ } } } - - SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + // Because the imm operand of ADDS is an unsigned immediate, in the range 0 to + // 4095.For i8 operand, the largest immediate is 255, it can be handled well. + // For i16 operand, the largest immediate is larger than 4095, it should be + // optimized here. + // eg:movz w1, #65535; ldrh w0, [x0, #0]; cmp w0, w1 + // => ldrsh w0, [x0, #0]; cmn w0, #1 + // Checking whether it is profitable to do the optimization: + // 1.LHS and RHS must be 16 bits zero extend operands, and then sign extend + // them + // 2.LHS also must be a LOADNODE and have just one use, in order to combine + // ldrh with sign_extend_inreg to ldrsh. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { + if ((cast(RHS)->getZExtValue() >> 16 == 0) && + isa(LHS) && + cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && + cast(LHS)->getMemoryVT() == MVT::i16 && + LHS.getNode()->hasNUsesOfValue(1, 0)) { + int16_t ValueofRHS = cast(RHS)->getZExtValue(); + if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, + DAG.getValueType(MVT::i16)); + Cmp = emitComparison( + SExt, DAG.getConstant(ValueofRHS, RHS.getValueType()), CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); + return Cmp; + } + } + } + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); return Cmp; } Index: test/CodeGen/AArch64/cmpwithshort.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/cmpwithshort.ll @@ -0,0 +1,46 @@ +; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s + +define i16 @test_1cmp_signed_1(i16* %ptr1) { +; CHECK-LABLE: @test_1cmp_signed_1 +; CHECK: ldrsh +; CHECK-NEXT: cmn +entry: + %addr = getelementptr inbounds i16* %ptr1, i16 0 + %val = load i16* %addr, align 2 + %cmp = icmp eq i16 %val, -1 + br i1 %cmp, label %if, label %if.then +if: + ret i16 1 +if.then: + ret i16 0 +} + +define i16 @test_1cmp_signed_2(i16* %ptr1) { +; CHECK-LABLE: @test_1cmp_signed_2 +; CHECK: ldrsh +; CHECK-NEXT: cmn +entry: + %addr = getelementptr inbounds i16* %ptr1, i16 0 + %val = load i16* %addr, align 2 + %cmp = icmp sge i16 %val, -1 + br i1 %cmp, label %if, label %if.then +if: + ret i16 1 +if.then: + ret i16 0 +} + +define i16 @test_1cmp_unsigned_1(i16* %ptr1) { +; CHECK-LABLE: @test_1cmp_unsigned_1 +; CHECK: ldrsh +; CHECK-NEXT: cmn +entry: + %addr = getelementptr inbounds i16* %ptr1, i16 0 + %val = load i16* %addr, align 2 + %cmp = icmp uge i16 %val, -1 + br i1 %cmp, label %if, label %if.then +if: + ret i16 1 +if.then: + ret i16 0 +}