diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17647,6 +17647,54 @@ return SDValue(); } +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) x)) => (CSEL l r cc2 cond) +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) y)) => (CSEL l r !cc2 cond) +// Where cc1 is any reflexive relation (eg EQ) + +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) x)) => (CSEL l r !cc2 cond) +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) y)) => (CSEL l r cc2 cond) +// Where cc1 is any irreflexive relation (eg NE) +static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) { + SDValue L = Op->getOperand(0); + SDValue R = Op->getOperand(1); + AArch64CC::CondCode OpCC = + static_cast(Op->getConstantOperandVal(2)); + + SDValue OpCmp = Op->getOperand(3); + if (!isCMP(OpCmp)) + return SDValue(); + + SDValue CmpLHS = OpCmp.getOperand(0); + SDValue CmpRHS = OpCmp.getOperand(1); + + if (CmpRHS.getOpcode() == AArch64ISD::CSEL) + std::swap(CmpLHS, CmpRHS); + else if (CmpLHS.getOpcode() != AArch64ISD::CSEL) + return SDValue(); + + SDValue X = CmpLHS->getOperand(0); + SDValue Y = CmpLHS->getOperand(1); + AArch64CC::CondCode CC = + static_cast(CmpLHS->getConstantOperandVal(2)); + SDValue Cond = CmpLHS->getOperand(3); + + if (CmpRHS == Y) + CC = AArch64CC::getInvertedCondCode(CC); + else if (CmpRHS != X) + return SDValue(); + + if (AArch64CC::isIrreflexive(OpCC)) + CC = AArch64CC::getInvertedCondCode(CC); + else if (!AArch64CC::isReflexive(OpCC)) + return SDValue(); + + SDLoc DL(Op); + EVT VT = Op->getValueType(0); + + SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond); +} + // Optimize CSEL instructions static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -17655,6 +17703,9 @@ if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); + if (SDValue R = foldCSELOfCSEL(N, DAG)) + return R; + return performCONDCombine(N, DCI, DAG, 2, 3); } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -331,6 +331,41 @@ case LE: return Z; // Z == 1 || N != V } } + +/// Return true if Code is a reflexive relationship: +/// forall x. (CSET Code (CMP x x)) == 1 +inline static bool isReflexive(CondCode Code) { + switch (Code) { + case EQ: + case HS: + case PL: + case LS: + case GE: + case LE: + case AL: + case NV: + return true; + default: + return false; + } +} + +/// Return true if Code is an irreflexive relationship: +/// forall x. (CSET Code (CMP x x)) == 0 +inline static bool isIrreflexive(CondCode Code) { + switch (Code) { + case NE: + case LO: + case MI: + case HI: + case LT: + case GT: + return true; + default: + return false; + } +} + } // end namespace AArch64CC struct SysAlias { diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -396,10 +396,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: cset w9, gt ; CHECK-NEXT: csel w9, w10, w9, eq @@ -459,10 +458,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: cset w9, gt ; CHECK-NEXT: csel w9, w10, w9, eq @@ -528,10 +526,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: cset w9, gt ; CHECK-NEXT: csel w9, w10, w9, eq diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -397,37 +397,35 @@ ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: csel w9, w10, w9, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: csel x10, x19, xzr, lt +; CHECK-NEXT: cset w12, lt ; CHECK-NEXT: cmp x11, #0 ; CHECK-NEXT: cset w11, gt ; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: csel w9, w12, w9, eq +; CHECK-NEXT: cmp w11, #0 +; CHECK-NEXT: csel x10, x10, xzr, ne ; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csel x8, x8, xzr, ne -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -511,37 +509,35 @@ ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinc x8, x1, xzr, lt -; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: csinc x9, x1, xzr, lt +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: csel w9, w10, w9, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: csel x10, x19, xzr, lt +; CHECK-NEXT: cset w12, lt ; CHECK-NEXT: cmp x11, #0 ; CHECK-NEXT: cset w11, gt ; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: csel w8, w12, w8, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x8, x9, xzr, ne ; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: csel x10, x10, xzr, ne +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel x8, x8, xzr, ne ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -637,37 +633,35 @@ ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinc x8, x1, xzr, lt -; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: csinc x9, x1, xzr, lt +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: csel w9, w10, w9, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: csel x10, x19, xzr, lt +; CHECK-NEXT: cset w12, lt ; CHECK-NEXT: cmp x11, #0 ; CHECK-NEXT: cset w11, gt ; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: csel w8, w12, w8, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x8, x9, xzr, ne ; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: csel x10, x10, xzr, ne +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel x8, x8, xzr, ne ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -23,7 +23,6 @@ ; CHECK-NEXT: fcvtzs w8, s0 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -201,7 +200,6 @@ ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -382,7 +380,6 @@ ; CHECK-CVT-NEXT: fcvtzs w8, s0 ; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csel w8, w8, wzr, lt -; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: and w0, w8, #0x1 ; CHECK-CVT-NEXT: ret @@ -392,7 +389,6 @@ ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: cmp w8, #0 ; CHECK-FP16-NEXT: csel w8, w8, wzr, lt -; CHECK-FP16-NEXT: cmp w8, #0 ; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge ; CHECK-FP16-NEXT: and w0, w8, #0x1 ; CHECK-FP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1306,11 +1306,9 @@ ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csel w9, w9, wzr, lt -; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csinv w9, w9, wzr, ge ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 @@ -2062,57 +2060,49 @@ ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzs w13, s0 ; CHECK-CVT-NEXT: fcvtzs w8, s2 -; CHECK-CVT-NEXT: mov s2, v1.s[2] -; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzs w10, s3 +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csel w8, w8, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w10, s2 -; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzs w12, s2 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: csel w9, w9, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w11, s1 -; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge ; CHECK-CVT-NEXT: cmp w10, #0 ; CHECK-CVT-NEXT: csel w10, w10, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmp w10, #0 -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt -; CHECK-CVT-NEXT: cmp w12, #0 +; CHECK-CVT-NEXT: fcvtzs w9, s1 ; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csel w13, w13, wzr, lt -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csinv w9, w13, wzr, ge -; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csel w8, w13, wzr, lt +; CHECK-CVT-NEXT: csel w8, w9, wzr, lt ; CHECK-CVT-NEXT: fcvtzs w9, s0 -; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov v1.s[1], w12 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: fmov s1, w13 ; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: mov v1.s[1], w12 ; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: mov v1.s[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: csel w8, w9, wzr, lt +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: mov v2.s[3], w11 ; CHECK-CVT-NEXT: mov v1.s[3], w8 ; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -68,10 +68,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x0, x2 ; CHECK-NEXT: adcs x9, x1, x3 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x0, x8, xzr, eq -; CHECK-NEXT: csinv x1, x9, xzr, eq +; CHECK-NEXT: csinv x0, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo ; CHECK-NEXT: ret %1 = tail call i128 @llvm.uadd.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -126,10 +124,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbcs x9, x1, x3 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x0, xzr, x8, ne -; CHECK-NEXT: csel x1, xzr, x9, ne +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo ; CHECK-NEXT: ret %1 = tail call i128 @llvm.usub.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -185,11 +181,9 @@ ; CHECK-NEXT: adds x8, x0, x2 ; CHECK-NEXT: adcs x9, x1, x3 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: cset w11, vs -; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, ne -; CHECK-NEXT: csel x1, x11, x9, ne +; CHECK-NEXT: csel x0, x10, x8, vs +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: ret %1 = tail call i128 @llvm.sadd.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -245,11 +239,9 @@ ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbcs x9, x1, x3 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: cset w11, vs -; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, ne -; CHECK-NEXT: csel x1, x11, x9, ne +; CHECK-NEXT: csel x0, x10, x8, vs +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: ret %1 = tail call i128 @llvm.ssub.sat.i128(i128 %x, i128 %y) ret i128 %1 diff --git a/llvm/test/CodeGen/AArch64/i256-math.ll b/llvm/test/CodeGen/AArch64/i256-math.ll --- a/llvm/test/CodeGen/AArch64/i256-math.ll +++ b/llvm/test/CodeGen/AArch64/i256-math.ll @@ -70,12 +70,10 @@ ; CHECK-NEXT: adcs x9, x1, x5 ; CHECK-NEXT: adcs x10, x2, x6 ; CHECK-NEXT: adcs x11, x3, x7 -; CHECK-NEXT: cset w12, hs -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csinv x0, x8, xzr, eq -; CHECK-NEXT: csinv x1, x9, xzr, eq -; CHECK-NEXT: csinv x2, x10, xzr, eq -; CHECK-NEXT: csinv x3, x11, xzr, eq +; CHECK-NEXT: csinv x0, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo +; CHECK-NEXT: csinv x2, x10, xzr, lo +; CHECK-NEXT: csinv x3, x11, xzr, lo ; CHECK-NEXT: ret %1 = tail call i256 @llvm.uadd.sat.i256(i256 %x, i256 %y) ret i256 %1 @@ -146,12 +144,10 @@ ; CHECK-NEXT: sbcs x9, x1, x5 ; CHECK-NEXT: sbcs x10, x2, x6 ; CHECK-NEXT: sbcs x11, x3, x7 -; CHECK-NEXT: cset w12, lo -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, xzr, x8, ne -; CHECK-NEXT: csel x1, xzr, x9, ne -; CHECK-NEXT: csel x2, xzr, x10, ne -; CHECK-NEXT: csel x3, xzr, x11, ne +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo +; CHECK-NEXT: csel x2, xzr, x10, lo +; CHECK-NEXT: csel x3, xzr, x11, lo ; CHECK-NEXT: ret %1 = tail call i256 @llvm.usub.sat.i256(i256 %x, i256 %y) ret i256 %1 @@ -214,14 +210,12 @@ ; CHECK-NEXT: adcs x9, x1, x5 ; CHECK-NEXT: adcs x10, x2, x6 ; CHECK-NEXT: adcs x11, x3, x7 -; CHECK-NEXT: cset w12, vs -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, x13, x8, ne -; CHECK-NEXT: eor x8, x13, #0x8000000000000000 -; CHECK-NEXT: csel x1, x13, x9, ne -; CHECK-NEXT: csel x2, x13, x10, ne -; CHECK-NEXT: csel x3, x8, x11, ne +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: csel x0, x12, x8, vs +; CHECK-NEXT: eor x8, x12, #0x8000000000000000 +; CHECK-NEXT: csel x1, x12, x9, vs +; CHECK-NEXT: csel x2, x12, x10, vs +; CHECK-NEXT: csel x3, x8, x11, vs ; CHECK-NEXT: ret %1 = tail call i256 @llvm.sadd.sat.i256(i256 %x, i256 %y) ret i256 %1 @@ -292,14 +286,12 @@ ; CHECK-NEXT: sbcs x9, x1, x5 ; CHECK-NEXT: sbcs x10, x2, x6 ; CHECK-NEXT: sbcs x11, x3, x7 -; CHECK-NEXT: cset w12, vs -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, x13, x8, ne -; CHECK-NEXT: eor x8, x13, #0x8000000000000000 -; CHECK-NEXT: csel x1, x13, x9, ne -; CHECK-NEXT: csel x2, x13, x10, ne -; CHECK-NEXT: csel x3, x8, x11, ne +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: csel x0, x12, x8, vs +; CHECK-NEXT: eor x8, x12, #0x8000000000000000 +; CHECK-NEXT: csel x1, x12, x9, vs +; CHECK-NEXT: csel x2, x12, x10, vs +; CHECK-NEXT: csel x3, x8, x11, vs ; CHECK-NEXT: ret %1 = tail call i256 @llvm.ssub.sat.i256(i256 %x, i256 %y) ret i256 %1 diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -352,20 +352,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, x11, x8, ne -; CHECK-NEXT: eor x8, x11, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x2, x10, x8, vs +; CHECK-NEXT: eor x8, x10, #0x8000000000000000 +; CHECK-NEXT: csel x3, x8, x9, vs ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: eor x10, x11, #0x8000000000000000 -; CHECK-NEXT: csel x8, x11, x8, ne -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x8, x10, x8, vs +; CHECK-NEXT: eor x10, x10, #0x8000000000000000 +; CHECK-NEXT: csel x1, x10, x9, vs ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -355,20 +355,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, x11, x8, ne -; CHECK-NEXT: eor x8, x11, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x2, x10, x8, vs +; CHECK-NEXT: eor x8, x10, #0x8000000000000000 +; CHECK-NEXT: csel x3, x8, x9, vs ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: eor x10, x11, #0x8000000000000000 -; CHECK-NEXT: csel x8, x11, x8, ne -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x8, x10, x8, vs +; CHECK-NEXT: eor x10, x10, #0x8000000000000000 +; CHECK-NEXT: csel x1, x10, x9, vs ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -350,16 +350,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x2, x8, xzr, eq -; CHECK-NEXT: csinv x3, x9, xzr, eq +; CHECK-NEXT: csinv x2, x8, xzr, lo +; CHECK-NEXT: csinv x3, x9, xzr, lo ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x8, x8, xzr, eq -; CHECK-NEXT: csinv x1, x9, xzr, eq +; CHECK-NEXT: csinv x8, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -346,16 +346,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, xzr, x8, ne -; CHECK-NEXT: csel x3, xzr, x9, ne +; CHECK-NEXT: csel x2, xzr, x8, lo +; CHECK-NEXT: csel x3, xzr, x9, lo ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x8, xzr, x8, ne -; CHECK-NEXT: csel x1, xzr, x9, ne +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0