diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -552,6 +552,16 @@ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); }]>; +// topbitsallzero - Return true if all bits except the lowest bit are known zero +def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ + return SDValue(N,0)->getValueType(0) == MVT::i32 && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); + }]>; +def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ + return SDValue(N,0)->getValueType(0) == MVT::i64 && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); + }]>; + // Node definitions. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; @@ -2464,6 +2474,20 @@ def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; +def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), + (CSINCWr GPR32:$val, WZR, imm:$cc)>; +def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), + (CSINCXr GPR64:$val, XZR, imm:$cc)>; +def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), + (CSINCXr GPR64:$val, XZR, imm:$cc)>; + +def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), + (CSELWr WZR, GPR32:$val, imm:$cc)>; +def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), + (CSELXr XZR, GPR64:$val, imm:$cc)>; +def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), + (CSELXr XZR, GPR64:$val, imm:$cc)>; + // The inverse of the condition code from the alias instruction is what is used // in the aliased instruction. The parser all ready inverts the condition code // for these aliases. diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -623,8 +623,7 @@ ; SDISEL-NEXT: cset w8, gt ; SDISEL-NEXT: cmp x2, #2 ; SDISEL-NEXT: ccmp x2, #4, #4, lt -; SDISEL-NEXT: cset w9, gt -; SDISEL-NEXT: orr w8, w8, w9 +; SDISEL-NEXT: csinc w8, w8, wzr, le ; SDISEL-NEXT: cmp w8, #0 ; SDISEL-NEXT: csel x0, xzr, x3, ne ; SDISEL-NEXT: ret @@ -704,9 +703,8 @@ ; SDISEL-NEXT: cmp w0, #22 ; SDISEL-NEXT: mov w9, #44 ; SDISEL-NEXT: ccmp w0, w9, #0, ge -; SDISEL-NEXT: cset w9, gt +; SDISEL-NEXT: csel w8, wzr, w8, le ; SDISEL-NEXT: cmp w0, #99 -; SDISEL-NEXT: and w8, w8, w9 ; SDISEL-NEXT: mov w9, #77 ; SDISEL-NEXT: ccmp w0, w9, #4, ne ; SDISEL-NEXT: cset w9, eq diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll --- a/llvm/test/CodeGen/AArch64/bcmp.ll +++ b/llvm/test/CodeGen/AArch64/bcmp.ll @@ -493,17 +493,16 @@ define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) { ; CHECK-LABEL: bcmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x9, x8, [sp] -; CHECK-NEXT: ldp x10, x11, [sp, #16] -; CHECK-NEXT: cmp x10, x9 -; CHECK-NEXT: ccmp x11, x8, #0, eq -; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: cmp x2, x0 ; CHECK-NEXT: ccmp x3, x1, #0, eq +; CHECK-NEXT: ldp x9, x8, [sp] ; CHECK-NEXT: ccmp x6, x4, #0, eq +; CHECK-NEXT: ldp x10, x11, [sp, #16] ; CHECK-NEXT: ccmp x7, x5, #0, eq -; CHECK-NEXT: cset w9, ne -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: cmp x10, x9 +; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: csinc w0, w12, wzr, eq ; CHECK-NEXT: ret %xor0 = xor i128 %b0, %a0 %xor1 = xor i128 %b1, %a1 diff --git a/llvm/test/CodeGen/AArch64/csel-andor-onebit.ll b/llvm/test/CodeGen/AArch64/csel-andor-onebit.ll --- a/llvm/test/CodeGen/AArch64/csel-andor-onebit.ll +++ b/llvm/test/CodeGen/AArch64/csel-andor-onebit.ll @@ -4,10 +4,9 @@ define i32 @ori32i32_eq(i32 %x, i32 %y) { ; CHECK-LABEL: ori32i32_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: and w8, w0, #0x1 -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: csinc w0, w8, wzr, ne ; CHECK-NEXT: ret %xa = and i32 %x, 1 %c = icmp eq i32 %y, 0 @@ -19,10 +18,9 @@ define i32 @ori32_eq_c(i32 %x, i32 %y) { ; CHECK-LABEL: ori32_eq_c: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: and w8, w0, #0x1 -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: csinc w0, w8, wzr, ne ; CHECK-NEXT: ret %xa = and i32 %x, 1 %c = icmp eq i32 %y, 0 @@ -34,10 +32,9 @@ define i32 @ori32i64_eq(i32 %x, i64 %y) { ; CHECK-LABEL: ori32i64_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: and w8, w0, #0x1 -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: csinc w0, w8, wzr, ne ; CHECK-NEXT: ret %xa = and i32 %x, 1 %c = icmp eq i64 %y, 0 @@ -49,10 +46,9 @@ define i32 @ori32_sgt(i32 %x, i32 %y) { ; CHECK-LABEL: ori32_sgt: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: and w8, w0, #0x1 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: csinc w0, w8, wzr, le ; CHECK-NEXT: ret %xa = and i32 %x, 1 %c = icmp sgt i32 %y, 0 @@ -83,8 +79,7 @@ ; CHECK-NEXT: tst w0, #0xff ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: tst w1, #0xff -; CHECK-NEXT: cset w9, ne -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: csel w0, wzr, w8, eq ; CHECK-NEXT: ret %xc = icmp eq i8 %x, 0 %xa = zext i1 %xc to i32 @@ -113,10 +108,9 @@ define i64 @ori64i32_eq(i64 %x, i32 %y) { ; CHECK-LABEL: ori64i32_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: csinc x0, x8, xzr, ne ; CHECK-NEXT: ret %xa = and i64 %x, 1 %c = icmp eq i32 %y, 0 @@ -128,10 +122,9 @@ define i64 @ori64i64_eq(i64 %x, i64 %y) { ; CHECK-LABEL: ori64i64_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: csinc x0, x8, xzr, ne ; CHECK-NEXT: ret %xa = and i64 %x, 1 %c = icmp eq i64 %y, 0 @@ -143,10 +136,9 @@ define i64 @ori64_eq_c(i64 %x, i32 %y) { ; CHECK-LABEL: ori64_eq_c: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: csinc x0, x8, xzr, ne ; CHECK-NEXT: ret %xa = and i64 %x, 1 %c = icmp eq i32 %y, 0 @@ -161,8 +153,7 @@ ; CHECK-NEXT: tst w0, #0xff ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: tst w1, #0xff -; CHECK-NEXT: cset w9, ne -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: csel w0, wzr, w8, eq ; CHECK-NEXT: ret %xc = icmp eq i8 %x, 0 %xa = zext i1 %xc to i64 diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -262,22 +262,20 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_checked_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x8, x3, x0 -; CHECK-NEXT: umulh x9, x0, x2 -; CHECK-NEXT: madd x8, x1, x2, x8 -; CHECK-NEXT: umulh x10, x1, x2 -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: cset w9, hs ; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: mul x9, x3, x0 +; CHECK-NEXT: madd x9, x1, x2, x9 +; CHECK-NEXT: ccmp xzr, x8, #0, eq +; CHECK-NEXT: umulh x8, x3, x0 +; CHECK-NEXT: ccmp xzr, x8, #0, eq +; CHECK-NEXT: umulh x8, x0, x2 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: ccmp xzr, x10, #0, eq ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: orr w9, w10, w9 -; CHECK-NEXT: eor w2, w9, #0x1 +; CHECK-NEXT: adds x1, x8, x9 +; CHECK-NEXT: csinc w8, w10, wzr, lo +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -292,21 +290,19 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_overflowing_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x8, x3, x0 -; CHECK-NEXT: umulh x9, x0, x2 -; CHECK-NEXT: madd x8, x1, x2, x8 -; CHECK-NEXT: umulh x10, x1, x2 -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: cset w9, hs ; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: ccmp xzr, x8, #0, eq +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: madd x8, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: umulh x9, x0, x2 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: ccmp xzr, x10, #0, eq ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: orr w2, w10, w9 +; CHECK-NEXT: adds x1, x9, x8 +; CHECK-NEXT: csinc w2, w10, wzr, lo ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -320,19 +316,18 @@ define i128 @u128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_saturating_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x8, x3, x0 -; CHECK-NEXT: umulh x9, x0, x2 -; CHECK-NEXT: madd x8, x1, x2, x8 -; CHECK-NEXT: umulh x10, x1, x2 -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: cset w9, hs ; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: umulh x10, x3, x0 -; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: ccmp xzr, x8, #0, eq +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: madd x8, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: umulh x9, x0, x2 ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: orr w9, w10, w9 +; CHECK-NEXT: adds x8, x9, x8 +; CHECK-NEXT: csinc w9, w10, wzr, lo ; CHECK-NEXT: mul x10, x0, x2 ; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csinv x0, x10, xzr, eq diff --git a/llvm/test/CodeGen/AArch64/mul-cmp.ll b/llvm/test/CodeGen/AArch64/mul-cmp.ll --- a/llvm/test/CodeGen/AArch64/mul-cmp.ll +++ b/llvm/test/CodeGen/AArch64/mul-cmp.ll @@ -8,11 +8,10 @@ define i1 @mul_nsw_eq0_i8(i8 %x, i8 %y) { ; CHECK-LABEL: mul_nsw_eq0_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: tst w1, #0xff -; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: tst w0, #0xff -; CHECK-NEXT: cset w9, eq -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: tst w1, #0xff +; CHECK-NEXT: csinc w0, w8, wzr, ne ; CHECK-NEXT: ret %m = mul nsw i8 %x, %y %r = icmp eq i8 %m, 0 @@ -50,11 +49,10 @@ define i1 @mul_nsw_ne0_i16(i16 %x, i16 %y) { ; CHECK-LABEL: mul_nsw_ne0_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: tst w1, #0xffff -; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: tst w0, #0xffff -; CHECK-NEXT: cset w9, ne -; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: tst w1, #0xffff +; CHECK-NEXT: csel w0, wzr, w8, eq ; CHECK-NEXT: ret %m = mul nsw i16 %x, %y %r = icmp ne i16 %m, 0 diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -4,21 +4,19 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; AARCH-LABEL: muloti_test: ; AARCH: // %bb.0: // %start -; AARCH-NEXT: mul x8, x3, x0 -; AARCH-NEXT: umulh x9, x0, x2 -; AARCH-NEXT: madd x8, x1, x2, x8 -; AARCH-NEXT: umulh x10, x1, x2 -; AARCH-NEXT: adds x8, x9, x8 -; AARCH-NEXT: cset w9, hs ; AARCH-NEXT: cmp x1, #0 +; AARCH-NEXT: umulh x8, x1, x2 ; AARCH-NEXT: ccmp x3, #0, #4, ne -; AARCH-NEXT: mov x1, x8 -; AARCH-NEXT: ccmp xzr, x10, #0, eq -; AARCH-NEXT: umulh x10, x3, x0 +; AARCH-NEXT: umulh x9, x3, x0 +; AARCH-NEXT: ccmp xzr, x8, #0, eq +; AARCH-NEXT: mul x8, x3, x0 +; AARCH-NEXT: madd x8, x1, x2, x8 +; AARCH-NEXT: ccmp xzr, x9, #0, eq +; AARCH-NEXT: umulh x9, x0, x2 ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-NEXT: ccmp xzr, x10, #0, eq ; AARCH-NEXT: cset w10, ne -; AARCH-NEXT: orr w2, w10, w9 +; AARCH-NEXT: adds x1, x9, x8 +; AARCH-NEXT: csinc w2, w10, wzr, lo ; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -321,16 +321,22 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind { ; CHECK-LABEL: umulo_v2i128: ; CHECK: // %bb.0: +; CHECK-NEXT: cmp x3, #0 +; CHECK-NEXT: umulh x8, x3, x6 +; CHECK-NEXT: ccmp x7, #0, #4, ne +; CHECK-NEXT: umulh x9, x7, x2 +; CHECK-NEXT: umulh x11, x5, x0 +; CHECK-NEXT: ccmp xzr, x8, #0, eq ; CHECK-NEXT: mul x8, x7, x2 -; CHECK-NEXT: umulh x9, x2, x6 ; CHECK-NEXT: madd x8, x3, x6, x8 -; CHECK-NEXT: umulh x10, x3, x6 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: umulh x9, x2, x6 +; CHECK-NEXT: cset w10, ne ; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: umulh x11, x7, x2 -; CHECK-NEXT: cset w9, hs -; CHECK-NEXT: cmp x3, #0 -; CHECK-NEXT: ccmp x7, #0, #4, ne -; CHECK-NEXT: umulh x13, x1, x4 +; CHECK-NEXT: csinc w9, w10, wzr, lo +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x5, #0, #4, ne +; CHECK-NEXT: umulh x10, x1, x4 ; CHECK-NEXT: ccmp xzr, x10, #0, eq ; CHECK-NEXT: mul x10, x5, x0 ; CHECK-NEXT: madd x10, x1, x4, x10 @@ -338,16 +344,8 @@ ; CHECK-NEXT: umulh x11, x0, x4 ; CHECK-NEXT: cset w12, ne ; CHECK-NEXT: adds x10, x11, x10 -; CHECK-NEXT: cset w11, hs -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: ccmp x5, #0, #4, ne -; CHECK-NEXT: orr w9, w12, w9 +; CHECK-NEXT: csinc w11, w12, wzr, lo ; CHECK-NEXT: mul x12, x0, x4 -; CHECK-NEXT: ccmp xzr, x13, #0, eq -; CHECK-NEXT: umulh x13, x5, x0 -; CHECK-NEXT: ccmp xzr, x13, #0, eq -; CHECK-NEXT: cset w13, ne -; CHECK-NEXT: orr w11, w13, w11 ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: ldr x11, [sp] ; CHECK-NEXT: mov v0.s[1], w9