Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1460,6 +1460,21 @@ return IsLegal; } +// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on +// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags +// can be set differently by this operation. It comes down to whether +// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then +// everything is fine. If not then the optimization is wrong. Thus general +// comparisons are only valid if op2 != 0. +// +// So, finally, the only LLVM-native comparisons that don't mention C and V +// are SETEQ and SETNE. They're the only ones we can safely use CMN for in +// the absence of information about op2. +static bool isCMN(SDValue Op, ISD::CondCode CC) { + return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && + (CC == ISD::SETEQ || CC == ISD::SETNE); +} + static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) { EVT VT = LHS.getValueType(); @@ -1482,18 +1497,8 @@ // register to WZR/XZR if it ends up being unused. unsigned Opcode = AArch64ISD::SUBS; - if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on - // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags - // can be set differently by this operation. It comes down to whether - // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then - // everything is fine. If not then the optimization is wrong. Thus general - // comparisons are only valid if op2 != 0. - - // So, finally, the only LLVM-native comparisons that don't mention C and V - // are SETEQ and SETNE. They're the only ones we can safely use CMN for in - // the absence of information about op2. + if (isCMN(RHS, CC)) { + // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ? Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) && @@ -1765,6 +1770,42 @@ /// @} +/// Returns how profitable it is to fold a comparison's operand's shift and/or +/// extension operations. +static unsigned getCmpOperandFoldingProfit(SDValue Op) { + auto isSupportedExtend = [&](SDValue V) { + if (V.getOpcode() == ISD::SIGN_EXTEND_INREG) + return true; + + if (V.getOpcode() == ISD::AND) + if (ConstantSDNode *MaskCst = dyn_cast(V.getOperand(1))) { + uint64_t Mask = MaskCst->getZExtValue(); + return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); + } + + return false; + }; + + if (!Op.hasOneUse()) + return 0; + + if (isSupportedExtend(Op)) + return 1; + + unsigned Opc = Op.getOpcode(); + if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) + if (ConstantSDNode *ShiftCst = dyn_cast(Op.getOperand(1))) { + uint64_t Shift = ShiftCst->getZExtValue(); + if (isSupportedExtend(Op.getOperand(0))) + return (Shift <= 4) ? 2 : 1; + EVT VT = Op.getValueType(); + if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63)) + return 1; + } + + return 0; +} + static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl) { @@ -1822,6 +1863,27 @@ } } } + + // Comparisons are canonicalized so that the RHS operand is simpler than the + // LHS one, the extreme case being when RHS is an immediate. However, AArch64 + // can fold some shift+extend operations on the RHS operand, so swap the + // operands if that can be done. + // + // For example: + // lsl w13, w11, #1 + // cmp w13, w12 + // can be turned into: + // cmp w12, w11, lsl #1 + if (!isa(RHS) || + !isLegalArithImmed(cast(RHS)->getZExtValue())) { + SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS; + + if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) { + std::swap(LHS, RHS); + CC = ISD::getSetCCSwappedOperands(CC); + } + } + SDValue Cmp; AArch64CC::CondCode AArch64CC; if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { Index: llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll +++ llvm/trunk/test/CodeGen/AArch64/and-mask-removal.ll @@ -179,7 +179,9 @@ ret_true: ret i1 true ; CHECK-LABEL: test16_2 -; CHECK: and +; CHECK: mov [[CST:w[0-9]+]], #16882 +; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]] +; CHECK: cmp {{.*}}, [[ADD]], uxth ; CHECK: ret } @@ -207,7 +209,9 @@ ret_true: ret i1 true ; CHECK-LABEL: test16_4 -; CHECK: and +; CHECK: mov [[CST:w[0-9]+]], #29985 +; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]] +; CHECK: cmp {{.*}}, [[ADD]], uxth ; CHECK: ret } @@ -249,7 +253,9 @@ ret_true: ret i1 true ; CHECK-LABEL: test16_7 -; CHECK: and +; CHECK: mov [[CST:w[0-9]+]], #9272 +; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]] +; CHECK: cmp {{.*}}, [[ADD]], uxth ; CHECK: ret } Index: llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll +++ llvm/trunk/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll @@ -35,8 +35,7 @@ define i1 @shifts_necmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: shifts_necmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxth ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = shl i32 %x, 16 ; 32-16 @@ -48,8 +47,7 @@ define i1 @shifts_necmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: shifts_necmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxtb ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = shl i32 %x, 24 ; 32-8 @@ -61,8 +59,7 @@ define i1 @shifts_necmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: shifts_necmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtw ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = shl i64 %x, 32 ; 64-32 @@ -74,8 +71,7 @@ define i1 @shifts_necmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: shifts_necmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxth ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = shl i64 %x, 48 ; 64-16 @@ -87,8 +83,7 @@ define i1 @shifts_necmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: shifts_necmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtb ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = shl i64 %x, 56 ; 64-8 @@ -117,8 +112,7 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxth ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i32 %x, -32768 ; ~0U << (16-1) @@ -129,8 +123,7 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxtb ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i32 %x, -128 ; ~0U << (8-1) @@ -141,8 +134,7 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtw ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1) @@ -153,8 +145,7 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxth ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, -32768 ; ~0U << (16-1) @@ -165,8 +156,7 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtb ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, -128 ; ~0U << (8-1) @@ -208,8 +198,7 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxth ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i32 %x, 32768 ; 1U << (16-1) @@ -220,8 +209,7 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxtb ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i32 %x, 128 ; 1U << (8-1) @@ -232,8 +220,7 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtw ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) @@ -244,8 +231,7 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxth ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, 32768 ; 1U << (16-1) @@ -256,8 +242,7 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtb ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, 128 ; 1U << (8-1) Index: llvm/trunk/test/CodeGen/AArch64/sat-add.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/sat-add.ll +++ llvm/trunk/test/CodeGen/AArch64/sat-add.ll @@ -52,11 +52,10 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { ; CHECK-LABEL: unsigned_sat_constant_i16_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #65493 -; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #65493 +; CHECK-NEXT: cmp w8, w0, uxth ; CHECK-NEXT: mov w8, #-43 -; CHECK-NEXT: csel w8, w0, w8, lo +; CHECK-NEXT: csel w8, w0, w8, hi ; CHECK-NEXT: add w0, w8, #42 // =42 ; CHECK-NEXT: ret %c = icmp ult i16 %x, -43 @@ -82,11 +81,10 @@ define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) { ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w10, #65493 -; CHECK-NEXT: add w9, w0, #42 // =42 -; CHECK-NEXT: cmp w8, w10 -; CHECK-NEXT: csinv w0, w9, wzr, ls +; CHECK-NEXT: mov w9, #65493 +; CHECK-NEXT: add w8, w0, #42 // =42 +; CHECK-NEXT: cmp w9, w0, uxth +; CHECK-NEXT: csinv w0, w8, wzr, hs ; CHECK-NEXT: ret %a = add i16 %x, 42 %c = icmp ugt i16 %x, -43 Index: llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll +++ llvm/trunk/test/CodeGen/AArch64/signed-truncation-check.ll @@ -35,8 +35,7 @@ define i1 @shifts_eqcmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: shifts_eqcmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxth ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = shl i32 %x, 16 ; 32-16 @@ -48,8 +47,7 @@ define i1 @shifts_eqcmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: shifts_eqcmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxtb ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = shl i32 %x, 24 ; 32-8 @@ -61,8 +59,7 @@ define i1 @shifts_eqcmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: shifts_eqcmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtw ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = shl i64 %x, 32 ; 64-32 @@ -74,8 +71,7 @@ define i1 @shifts_eqcmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: shifts_eqcmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxth ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = shl i64 %x, 48 ; 64-16 @@ -87,8 +83,7 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: shifts_eqcmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtb ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = shl i64 %x, 56 ; 64-8 @@ -117,8 +112,7 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxth ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i32 %x, -32768 ; ~0U << (16-1) @@ -129,8 +123,7 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxtb ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i32 %x, -128 ; ~0U << (8-1) @@ -141,8 +134,7 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtw ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1) @@ -153,8 +145,7 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxth ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, -32768 ; ~0U << (16-1) @@ -165,8 +156,7 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtb ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, -128 ; ~0U << (8-1) @@ -208,8 +198,7 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxth ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i32 %x, 32768 ; 1U << (16-1) @@ -220,8 +209,7 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cmp w0, w0, sxtb ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i32 %x, 128 ; 1U << (8-1) @@ -232,8 +220,7 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtw ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) @@ -244,8 +231,7 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxth ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, 32768 ; 1U << (16-1) @@ -256,8 +242,7 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cmp x0, w0, sxtb ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, 128 ; 1U << (8-1) Index: llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll +++ llvm/trunk/test/CodeGen/AArch64/swap-compare-operands.ll @@ -0,0 +1,632 @@ +; RUN: llc < %s -mtriple=arm64 | FileCheck %s + +define i1 @testSwapCmpWithLSL64_1(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithLSL64_1: +; CHECK: cmp x1, x0, lsl #1 +; CHECK-NEXT: cset w0, gt +entry: + %shl = shl i64 %a, 1 + %cmp = icmp slt i64 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSL64_63(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithLSL64_63: +; CHECK: cmp x1, x0, lsl #63 +; CHECK-NEXT: cset w0, gt +entry: + %shl = shl i64 %a, 63 + %cmp = icmp slt i64 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSL32_1(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithLSL32_1: +; CHECK: cmp w1, w0, lsl #1 +; CHECK-NEXT: cset w0, gt +entry: + %shl = shl i32 %a, 1 + %cmp = icmp slt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSL32_31(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithLSL32_31: +; CHECK: cmp w1, w0, lsl #31 +; CHECK-NEXT: cset w0, gt +entry: + %shl = shl i32 %a, 31 + %cmp = icmp slt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSR64_1(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithLSR64_1: +; CHECK: cmp x1, x0, lsr #1 +; CHECK-NEXT: cset w0, gt +entry: + %lshr = lshr i64 %a, 1 + %cmp = icmp slt i64 %lshr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSR64_63(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithLSR64_63: +; CHECK: cmp x1, x0, lsr #63 +; CHECK-NEXT: cset w0, gt +entry: + %lshr = lshr i64 %a, 63 + %cmp = icmp slt i64 %lshr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSR32_1(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithLSR32_1: +; CHECK: cmp w1, w0, lsr #1 +; CHECK-NEXT: cset w0, gt +entry: + %lshr = lshr i32 %a, 1 + %cmp = icmp slt i32 %lshr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithLSR32_31(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithLSR32_31: +; CHECK: cmp w1, w0, lsr #31 +; CHECK-NEXT: cset w0, gt +entry: + %lshr = lshr i32 %a, 31 + %cmp = icmp slt i32 %lshr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithASR64_1(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithASR64_1: +; CHECK: cmp x1, x0, asr #1 +; CHECK-NEXT: cset w0, gt +entry: + %ashr = ashr i64 %a, 1 + %cmp = icmp slt i64 %ashr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithASR64_63(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithASR64_63: +; CHECK: cmp x1, x0, asr #63 +; CHECK-NEXT: cset w0, gt +entry: + %ashr = ashr i64 %a, 63 + %cmp = icmp slt i64 %ashr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithASR32_1(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithASR32_1: +; CHECK: cmp w1, w0, asr #1 +; CHECK-NEXT: cset w0, gt +entry: + %ashr = ashr i32 %a, 1 + %cmp = icmp slt i32 %ashr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithASR32_31(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithASR32_31: +; CHECK: cmp w1, w0, asr #31 +; CHECK-NEXT: cset w0, gt +entry: + %ashr = ashr i32 %a, 31 + %cmp = icmp slt i32 %ashr, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedZeroExtend32_64(i32 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend32_64 +; CHECK: cmp x1, w0, uxtw #2 +; CHECK-NEXT: cset w0, lo +entry: + %a64 = zext i32 %a to i64 + %shl.0 = shl i64 %a64, 2 + %cmp = icmp ugt i64 %shl.0, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend16_64 +; CHECK: cmp x1, w0, uxth #2 +; CHECK-NEXT: cset w0, lo +entry: + %a64 = zext i16 %a to i64 + %shl.0 = shl i64 %a64, 2 + %cmp = icmp ugt i64 %shl.0, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64 +; CHECK: cmp x1, w0, uxtb #4 +; CHECK-NEXT: cset w0, lo +entry: + %a64 = zext i8 %a to i64 + %shl.2 = shl i64 %a64, 4 + %cmp = icmp ugt i64 %shl.2, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64 +; CHECK: cmp w1, w0, uxth #3 +; CHECK-NEXT: cset w0, lo +entry: + %a32 = zext i16 %a to i32 + %shl = shl i32 %a32, 3 + %cmp = icmp ugt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64 +; CHECK: cmp w1, w0, uxtb #4 +; CHECK-NEXT: cset w0, lo +entry: + %a32 = zext i8 %a to i32 + %shl = shl i32 %a32, 4 + %cmp = icmp ugt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithTooLargeShiftedZeroExtend8_64 +; CHECK: and [[REG:w[0-9]+]], w0, #0xff +; CHECK: cmp w1, [[REG]], lsl #5 +; CHECK-NEXT: cset w0, lo +entry: + %a32 = zext i8 %a to i32 + %shl = shl i32 %a32, 5 + %cmp = icmp ugt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithZeroExtend8_32(i8 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithZeroExtend8_64 +; CHECK: cmp w1, w0, uxtb +; CHECK-NEXT: cset w0, lo +entry: + %a32 = zext i8 %a to i32 + %cmp = icmp ugt i32 %a32, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedSignExtend32_64(i32 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend32_64 +; CHECK: cmp x1, w0, sxtw #2 +; CHECK-NEXT: cset w0, lo +entry: + %a64 = sext i32 %a to i64 + %shl.0 = shl i64 %a64, 2 + %cmp = icmp ugt i64 %shl.0, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedSignExtend16_64(i16 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithShiftedZeroExtend16_64 +; CHECK: cmp x1, w0, sxth #2 +; CHECK-NEXT: cset w0, lo +entry: + %a64 = sext i16 %a to i64 + %shl.0 = shl i64 %a64, 2 + %cmp = icmp ugt i64 %shl.0, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedSignExtend8_64(i8 %a, i64 %b) { +; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64 +; CHECK: cmp x1, w0, sxtb #4 +; CHECK-NEXT: cset w0, lo +entry: + %a64 = sext i8 %a to i64 + %shl.2 = shl i64 %a64, 4 + %cmp = icmp ugt i64 %shl.2, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedSignExtend16_32(i16 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64 +; CHECK: cmp w1, w0, sxth #3 +; CHECK-NEXT: cset w0, lo +entry: + %a32 = sext i16 %a to i32 + %shl = shl i32 %a32, 3 + %cmp = icmp ugt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithShiftedSignExtend8_32(i8 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64 +; CHECK: cmp w1, w0, sxtb #4 +; CHECK-NEXT: cset w0, lo +entry: + %a32 = sext i8 %a to i32 + %shl = shl i32 %a32, 4 + %cmp = icmp ugt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithTooLargeShiftedSignExtend8_32(i8 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithTooLargeShiftedSignExtend8_64 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK-NEXT: cmp w1, [[REG]], lsl #5 +; CHECK-NEXT: cset w0, lo +entry: + %a32 = sext i8 %a to i32 + %shl = shl i32 %a32, 5 + %cmp = icmp ugt i32 %shl, %b + ret i1 %cmp +} + +define i1 @testSwapCmpWithSignExtend8_32(i8 %a, i32 %b) { +; CHECK-LABEL testSwapCmpWithSignExtend8_64 +; CHECK: cmp w1, w0, sxtb +; CHECK-NEXT: cset w0, lo +entry: + %a32 = sext i8 %a to i32 + %cmp = icmp ugt i32 %a32, %b + ret i1 %cmp +} + +define i1 @testSwapCmnWithLSL64_1(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithLSL64_1: +; CHECK: cmn x1, x0, lsl #1 +; CHECK-NEXT: cset w0, ne +entry: + %shl = shl i64 %a, 1 + %na = sub i64 0, %shl + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +; Note: testing with a 62 bits shift as 63 has another optimization kicking in. +define i1 @testSwapCmnWithLSL64_62(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithLSL64_62: +; CHECK: cmn x1, x0, lsl #62 +; CHECK-NEXT: cset w0, ne +entry: + %shl = shl i64 %a, 62 + %na = sub i64 0, %shl + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +; Note: the 63 bits shift triggers a different optimization path, which leads +; to a similar result in terms of performances. We try to catch here any change +; so that this test can be adapted should the optimization be done with the +; operand swap. +define i1 @testSwapCmnWithLSL64_63(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithLSL64_63: +; CHECK: cmp x1, x0, lsl #63 +; CHECK-NEXT: cset w0, ne +entry: + %shl = shl i64 %a, 63 + %na = sub i64 0, %shl + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +define i1 @testSwapCmnWithLSL32_1(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithLSL32_1: +; CHECK: cmn w1, w0, lsl #1 +; CHECK-NEXT: cset w0, ne +entry: + %shl = shl i32 %a, 1 + %na = sub i32 0, %shl + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +; Note: testing with a 30 bits shift as 30 has another optimization kicking in. +define i1 @testSwapCmnWithLSL32_30(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithLSL32_30: +; CHECK: cmn w1, w0, lsl #30 +; CHECK-NEXT: cset w0, ne +entry: + %shl = shl i32 %a, 30 + %na = sub i32 0, %shl + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +; Note: the 31 bits shift triggers a different optimization path, which leads +; to a similar result in terms of performances. We try to catch here any change +; so that this test can be adapted should the optimization be done with the +; operand swap. +define i1 @testSwapCmnWithLSL32_31(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithLSL32_31: +; CHECK: cmp w1, w0, lsl #31 +; CHECK-NEXT: cset w0, ne +entry: + %shl = shl i32 %a, 31 + %na = sub i32 0, %shl + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +define i1 @testSwapCmnWithLSR64_1(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithLSR64_1: +; CHECK: cmn x1, x0, lsr #1 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = lshr i64 %a, 1 + %na = sub i64 0, %lshr + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +; Note: testing with a 62 bits shift as 63 has another optimization kicking in. +define i1 @testSwapCmnWithLSR64_62(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithLSR64_62: +; CHECK: cmn x1, x0, lsr #62 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = lshr i64 %a, 62 + %na = sub i64 0, %lshr + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +; Note: the 63 bits shift triggers a different optimization path, which leads +; to a similar result in terms of performances. We try to catch here any change +; so that this test can be adapted should the optimization be done with the +; operand swap. +define i1 @testSwapCmnWithLSR64_63(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithLSR64_63: +; CHECK: cmp x1, x0, asr #63 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = lshr i64 %a, 63 + %na = sub i64 0, %lshr + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +define i1 @testSwapCmnWithLSR32_1(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithLSR32_1: +; CHECK: cmn w1, w0, lsr #1 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = lshr i32 %a, 1 + %na = sub i32 0, %lshr + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +; Note: testing with a 30 bits shift as 31 has another optimization kicking in. +define i1 @testSwapCmnWithLSR32_30(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithLSR32_30: +; CHECK: cmn w1, w0, lsr #30 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = lshr i32 %a, 30 + %na = sub i32 0, %lshr + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +; Note: the 31 bits shift triggers a different optimization path, which leads +; to a similar result in terms of performances. We try to catch here any change +; so that this test can be adapted should the optimization be done with the +; operand swap. +define i1 @testSwapCmnWithLSR32_31(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithLSR32_31: +; CHECK: cmp w1, w0, asr #31 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = lshr i32 %a, 31 + %na = sub i32 0, %lshr + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +define i1 @testSwapCmnWithASR64_1(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithASR64_1: +; CHECK: cmn x1, x0, asr #3 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = ashr i64 %a, 3 + %na = sub i64 0, %lshr + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +; Note: testing with a 62 bits shift as 63 has another optimization kicking in. +define i1 @testSwapCmnWithASR64_62(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithASR64_62: +; CHECK: cmn x1, x0, asr #62 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = ashr i64 %a, 62 + %na = sub i64 0, %lshr + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +; Note: the 63 bits shift triggers a different optimization path, which leads +; to a similar result in terms of performances. We try to catch here any change +; so that this test can be adapted should the optimization be done with the +; operand swap. +define i1 @testSwapCmnWithASR64_63(i64 %a, i64 %b) { +; CHECK-LABEL testSwapCmnWithASR64_63: +; CHECK: cmp x1, x0, lsr #63 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = ashr i64 %a, 63 + %na = sub i64 0, %lshr + %cmp = icmp ne i64 %na, %b + ret i1 %cmp +} + +define i1 @testSwapCmnWithASR32_1(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithASR32_1: +; CHECK: cmn w1, w0, asr #1 +; CHECK-NEXT: cset w0, eq +entry: + %lshr = ashr i32 %a, 1 + %na = sub i32 0, %lshr + %cmp = icmp eq i32 %na, %b + ret i1 %cmp +} + +; Note: testing with a 30 bits shift as 31 has another optimization kicking in. +define i1 @testSwapCmnWithASR32_30(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithASR32_30: +; CHECK: cmn w1, w0, asr #30 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = ashr i32 %a, 30 + %na = sub i32 0, %lshr + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +; Note: the 31 bits shift triggers a different optimization path, which leads +; to a similar result in terms of performances. We try to catch here any change +; so that this test can be adapted should the optimization be done with the +; operand swap. +define i1 @testSwapCmnWithASR32_31(i32 %a, i32 %b) { +; CHECK-LABEL testSwapCmnWithASR32_31: +; CHECK: cmp w1, w0, lsr #31 +; CHECK-NEXT: cset w0, ne +entry: + %lshr = ashr i32 %a, 31 + %na = sub i32 0, %lshr + %cmp = icmp ne i32 %na, %b + ret i1 %cmp +} + +define i64 @testSwapCmpToCmnWithZeroExtend(i32 %a32, i16 %a16, i8 %a8, i64 %b64, i32 %b32) { +; CHECK-LABEL testSwapCmpToCmnWithZeroExtend: +t0: + %conv0 = zext i32 %a32 to i64 + %shl0 = shl i64 %conv0, 1 + %na0 = sub i64 0, %shl0 + %cmp0 = icmp ne i64 %na0, %b64 +; CHECK: cmn x3, w0, uxtw #1 + br i1 %cmp0, label %t1, label %end + +t1: + %conv1 = zext i16 %a16 to i64 + %shl1 = shl i64 %conv1, 4 + %na1 = sub i64 0, %shl1 + %cmp1 = icmp ne i64 %na1, %b64 +; CHECK: cmn x3, w1, uxth #4 + br i1 %cmp1, label %t2, label %end + +t2: + %conv2 = zext i8 %a8 to i64 + %shl2 = shl i64 %conv2, 3 + %na2 = sub i64 0, %shl2 + %cmp2 = icmp ne i64 %na2, %b64 +; CHECK: cmn x3, w2, uxtb #3 + br i1 %cmp2, label %t3, label %end + +t3: + %conv3 = zext i16 %a16 to i32 + %shl3 = shl i32 %conv3, 2 + %na3 = sub i32 0, %shl3 + %cmp3 = icmp ne i32 %na3, %b32 +; CHECK: cmn w4, w1, uxth #2 + br i1 %cmp3, label %t4, label %end + +t4: + %conv4 = zext i8 %a8 to i32 + %shl4 = shl i32 %conv4, 1 + %na4 = sub i32 0, %shl4 + %cmp4 = icmp ne i32 %na4, %b32 +; CHECK: cmn w4, w2, uxtb #1 + br i1 %cmp4, label %t5, label %end + +t5: + %conv5 = zext i8 %a8 to i32 + %shl5 = shl i32 %conv5, 5 + %na5 = sub i32 0, %shl5 + %cmp5 = icmp ne i32 %na5, %b32 +; CHECK: and [[REG:w[0-9]+]], w2, #0xff +; CHECK: cmn w4, [[REG]], lsl #5 + br i1 %cmp5, label %t6, label %end + +t6: + %conv6 = zext i8 %a8 to i32 + %na6 = sub i32 0, %conv6 + %cmp6 = icmp ne i32 %na6, %b32 +; CHECK: cmn w4, w2, uxtb + br i1 %cmp6, label %t7, label %end + +t7: + ret i64 0 + +end: + ret i64 1 +} +define i64 @testSwapCmpToCmnWithSignExtend(i32 %a32, i16 %a16, i8 %a8, i64 %b64, i32 %b32) { +; CHECK-LABEL testSwapCmpToCmnWithSignExtend: +t0: + %conv0 = sext i32 %a32 to i64 + %shl0 = shl i64 %conv0, 1 + %na0 = sub i64 0, %shl0 + %cmp0 = icmp ne i64 %na0, %b64 +; CHECK: cmn x3, w0, sxtw #1 + br i1 %cmp0, label %t1, label %end + +t1: + %conv1 = sext i16 %a16 to i64 + %shl1 = shl i64 %conv1, 4 + %na1 = sub i64 0, %shl1 + %cmp1 = icmp ne i64 %na1, %b64 +; CHECK: cmn x3, w1, sxth #4 + br i1 %cmp1, label %t2, label %end + +t2: + %conv2 = sext i8 %a8 to i64 + %shl2 = shl i64 %conv2, 3 + %na2 = sub i64 0, %shl2 + %cmp2 = icmp ne i64 %na2, %b64 +; CHECK: cmn x3, w2, sxtb #3 + br i1 %cmp2, label %t3, label %end + +t3: + %conv3 = sext i16 %a16 to i32 + %shl3 = shl i32 %conv3, 2 + %na3 = sub i32 0, %shl3 + %cmp3 = icmp ne i32 %na3, %b32 +; CHECK: cmn w4, w1, sxth #2 + br i1 %cmp3, label %t4, label %end + +t4: + %conv4 = sext i8 %a8 to i32 + %shl4 = shl i32 %conv4, 1 + %na4 = sub i32 0, %shl4 + %cmp4 = icmp ne i32 %na4, %b32 +; CHECK: cmn w4, w2, sxtb #1 + br i1 %cmp4, label %t5, label %end + +t5: + %conv5 = sext i8 %a8 to i32 + %shl5 = shl i32 %conv5, 5 + %na5 = sub i32 0, %shl5 + %cmp5 = icmp ne i32 %na5, %b32 +; CHECK: sxtb [[REG:w[0-9]+]], w2 +; CHECK: cmn w4, [[REG]], lsl #5 + br i1 %cmp5, label %t6, label %end + +t6: + %conv6 = sext i8 %a8 to i32 + %na6 = sub i32 0, %conv6 + %cmp6 = icmp ne i32 %na6, %b32 +; CHECK: cmn w4, w2, sxtb + br i1 %cmp6, label %t7, label %end + +t7: + ret i64 0 + +end: + ret i64 1 +}