diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -526,6 +526,15 @@ setOperationAction(ISD::UMULO, MVT::i32, Custom); setOperationAction(ISD::UMULO, MVT::i64, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i64, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i64, Custom); + setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom); + setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom); + setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom); + setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom); + setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); @@ -3317,6 +3326,86 @@ Op.getOperand(2)); } +// Sets 'C' bit of NZCV to 0 if value is 0, else sets 'C' bit to 1 +static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG) { + SDLoc DL(Value); + SDValue One = DAG.getConstant(1, DL, Value.getValueType()); + SDValue Cmp = + DAG.getNode(AArch64ISD::SUBS, DL, + DAG.getVTList(Value.getValueType(), MVT::Glue), Value, One); + return Cmp.getValue(1); +} + +// Value is 1 if 'C' bit of NZCV is 1, else 0 +static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) { + assert(Flag.getResNo() == 1); + SDLoc DL(Flag); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue CC = DAG.getConstant(AArch64CC::HS, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag); +} + +// Value is 1 if 'V' bit of NZCV is 1, else 0 +static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) { + assert(Flag.getResNo() == 1); + SDLoc DL(Flag); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag); +} + +// This lowering is inefficient, but it will get cleaned up by +// `performAddSubCombine` +static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + EVT VT0 = Op.getValue(0).getValueType(); + EVT VT1 = Op.getValue(1).getValueType(); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT0)) { + return SDValue(); + } + + bool IsSigned; + unsigned Opcode; + switch (Op->getOpcode()) { + default: + return SDValue(); + case ISD::ADDCARRY: + IsSigned = false; + Opcode = AArch64ISD::ADCS; + break; + case ISD::SUBCARRY: + IsSigned = false; + Opcode = AArch64ISD::SBCS; + break; + case ISD::SADDO_CARRY: + IsSigned = true; + Opcode = AArch64ISD::ADCS; + break; + case ISD::SSUBO_CARRY: + IsSigned = true; + Opcode = AArch64ISD::SBCS; + break; + } + + SDValue OpLhs = Op.getOperand(0); + SDValue OpRhs = Op.getOperand(1); + SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG); + + SDLoc DL(Op); + SDVTList VTs = DAG.getVTList(VT0, VT1); + + SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLhs, + OpRhs, OpCarryIn); + + SDValue OutFlag = IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG) + : carryFlagToValue(Sum.getValue(1), VT1, DAG); + + return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag); +} + static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) @@ -5125,6 +5214,11 @@ case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: + return lowerADDSUBCARRY(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -15301,9 +15395,163 @@ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); } +static bool isCMP(SDValue Op) { + return Op.getOpcode() == AArch64ISD::SUBS && + !Op.getNode()->hasAnyUseOfValue(0); +} + +// (CSEL 1 0 CC Cond) => CC +// (CSEL 0 1 CC Cond) => !CC +static Optional getCSETCondCode(SDValue Op) { + if (Op.getOpcode() == AArch64ISD::CSEL) { + auto CC = static_cast(Op.getConstantOperandVal(2)); + if (CC == AArch64CC::AL || CC == AArch64CC::NV) + return None; + SDValue Lhs = Op.getOperand(0); + SDValue Rhs = Op.getOperand(1); + if (isOneConstant(Lhs) && isNullConstant(Rhs)) { + return CC; + } + if (isNullConstant(Lhs) && isOneConstant(Rhs)) { + return getInvertedCondCode(CC); + } + } + return None; +} + +// ADDS => ADD +// SUBS => SUB +// ADCS => ADC +// SBSC => SBC +// if flag is unused +static SDValue removeDeadFlags(SDNode *Op, SelectionDAG &DAG) { + unsigned Opcode; + bool CarryIn; + switch (Op->getOpcode()) { + default: + return SDValue(); + case AArch64ISD::ADDS: + Opcode = ISD::ADD; + CarryIn = false; + break; + case AArch64ISD::SUBS: + Opcode = ISD::SUB; + CarryIn = false; + break; + case AArch64ISD::ADCS: + Opcode = AArch64ISD::ADC; + CarryIn = true; + break; + case AArch64ISD::SBCS: + Opcode = AArch64ISD::SBC; + CarryIn = true; + break; + } + + if (Op->hasAnyUseOfValue(1)) + return SDValue(); + + SDLoc DL(Op); + EVT VT0 = Op->getValueType(0); + EVT VT1 = Op->getValueType(1); + SDVTList VTs = DAG.getVTList(VT0, VT1); + + SDValue OpLhs = Op->getOperand(0); + SDValue OpRhs = Op->getOperand(1); + + // SelectionDAG combines must return an op that outputs the same number + // of values as the original. This means we cannot just return `FlaglessOp`. + // Instead we return both `FlaglessOp` and `Flag` and then `Flag` is + // removed by SelectionDAG's dead code elimination + SDValue FlaglessOp = + CarryIn ? DAG.getNode(Opcode, DL, VT0, OpLhs, OpRhs, Op->getOperand(2)) + : DAG.getNode(Opcode, DL, VT0, OpLhs, OpRhs); + SDValue Flag = DAG.getNode(ISD::UNDEF, DL, VT0); + + return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, FlaglessOp, Flag); +} + +// (ADC{S} l r (CMP (CSET HS/HI carry) 1)) => (ADC{S} l r carry) +// (SBC{S} l r (CMP (CSET LS/LO carry) 1)) => (SBC{S} l r carry) +static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG) { + unsigned Opcode = Op->getOpcode(); + bool IsAdd; + bool SetsFlags; + switch (Opcode) { + default: + return SDValue(); + case AArch64ISD::ADC: + IsAdd = true; + SetsFlags = false; + break; + case AArch64ISD::ADCS: + IsAdd = true; + SetsFlags = true; + break; + case AArch64ISD::SBC: + IsAdd = false; + SetsFlags = false; + break; + case AArch64ISD::SBCS: + IsAdd = false; + SetsFlags = true; + break; + } + + SDValue OpLhs = Op->getOperand(0); + SDValue OpRhs = Op->getOperand(1); + + if (!isCMP(Op->getOperand(2))) + return SDValue(); + SDValue CmpOp = Op->getOperand(2); + + auto CC = getCSETCondCode(CmpOp.getOperand(0)); + if (!(IsAdd ? ((CC == AArch64CC::HS) || (CC == AArch64CC::HI)) + : ((CC == AArch64CC::LO) || (CC == AArch64CC::LS)))) + return SDValue(); + + if (CmpOp->getConstantOperandVal(1) != 1) + return SDValue(); + SDValue CsetOp = CmpOp.getOperand(0); + SDValue CsetCarry = CsetOp.getOperand(CsetOp->getNumOperands() - 1); + + SDVTList VTs = SetsFlags + ? DAG.getVTList(Op->getValueType(0), Op->getValueType(1)) + : DAG.getVTList(Op->getValueType(0)); + + return DAG.getNode(Opcode, SDLoc(Op), VTs, OpLhs, OpRhs, CsetCarry); +} + +// (ADC x 0 cond) => (CINC x HS cond) +static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != AArch64ISD::ADC) + return SDValue(); + + SDValue Lhs = N->getOperand(0); + SDValue Rhs = N->getOperand(1); + SDValue Cond = N->getOperand(2); + + if (!isNullConstant(Rhs)) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // (CINC x cc cond) <=> (CSINC x x !cc cond) + SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSINC, DL, VT, Lhs, Lhs, CC, Cond); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + if (SDValue Val = removeDeadFlags(N, DAG)) + return Val; + if (SDValue Val = foldOverflowCheck(N, DAG)) + return Val; + if (SDValue Val = foldADCToCINC(N, DAG)) + return Val; + // Try to change sum of two reductions. if (SDValue Val = performAddUADDVCombine(N, DAG)) return Val; @@ -18506,6 +18754,12 @@ break; case ISD::ADD: case ISD::SUB: + case AArch64ISD::ADDS: + case AArch64ISD::SUBS: + case AArch64ISD::ADC: + case AArch64ISD::ADCS: + case AArch64ISD::SBC: + case AArch64ISD::SBCS: return performAddSubCombine(N, DCI, DAG); case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); diff --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll --- a/llvm/test/CodeGen/AArch64/adc.ll +++ b/llvm/test/CodeGen/AArch64/adc.ll @@ -1,65 +1,96 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s define i128 @test_simple(i128 %a, i128 %b, i128 %c) { -; CHECK-LABEL: test_simple: +; CHECK-LE-LABEL: test_simple: +; CHECK-LE: ; %bb.0: +; CHECK-LE-NEXT: adds x8, x0, x2 +; CHECK-LE-NEXT: adc x9, x1, x3 +; CHECK-LE-NEXT: subs x0, x8, x4 +; CHECK-LE-NEXT: sbc x1, x9, x5 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_simple: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: adds x8, x1, x3 +; CHECK-BE-NEXT: adc x9, x0, x2 +; CHECK-BE-NEXT: subs x1, x8, x5 +; CHECK-BE-NEXT: sbc x0, x9, x4 +; CHECK-BE-NEXT: ret %valadd = add i128 %a, %b -; CHECK-LE: adds [[ADDLO:x[0-9]+]], x0, x2 -; CHECK-LE-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3 -; CHECK-BE: adds [[ADDLO:x[0-9]+]], x1, x3 -; CHECK-BE-NEXT: adcs [[ADDHI:x[0-9]+]], x0, x2 %valsub = sub i128 %valadd, %c -; CHECK-LE: subs x0, [[ADDLO]], x4 -; CHECK-LE: sbcs x1, [[ADDHI]], x5 -; CHECK-BE: subs x1, [[ADDLO]], x5 -; CHECK-BE: sbcs x0, [[ADDHI]], x4 ret i128 %valsub -; CHECK: ret } define i128 @test_imm(i128 %a) { -; CHECK-LABEL: test_imm: +; CHECK-LE-LABEL: test_imm: +; CHECK-LE: ; %bb.0: +; CHECK-LE-NEXT: adds x0, x0, #12 +; CHECK-LE-NEXT: cinc x1, x1, hs +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_imm: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: adds x1, x1, #12 +; CHECK-BE-NEXT: cinc x0, x0, hs +; CHECK-BE-NEXT: ret %val = add i128 %a, 12 -; CHECK-LE: adds x0, x0, #12 -; CHECK-LE: adcs x1, x1, {{x[0-9]|xzr}} -; CHECK-BE: adds x1, x1, #12 -; CHECK-BE: adcs x0, x0, {{x[0-9]|xzr}} ret i128 %val -; CHECK: ret } define i128 @test_shifted(i128 %a, i128 %b) { -; CHECK-LABEL: test_shifted: +; CHECK-LE-LABEL: test_shifted: +; CHECK-LE: ; %bb.0: +; CHECK-LE-NEXT: extr x8, x3, x2, #19 +; CHECK-LE-NEXT: adds x0, x0, x2, lsl #45 +; CHECK-LE-NEXT: adc x1, x1, x8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_shifted: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: extr x8, x2, x3, #19 +; CHECK-BE-NEXT: adds x1, x1, x3, lsl #45 +; CHECK-BE-NEXT: adc x0, x0, x8 +; CHECK-BE-NEXT: ret %rhs = shl i128 %b, 45 %val = add i128 %a, %rhs -; CHECK-LE: adds x0, x0, x2, lsl #45 -; CHECK-LE: adcs x1, x1, {{x[0-9]}} -; CHECK-BE: adds x1, x1, x3, lsl #45 -; CHECK-BE: adcs x0, x0, {{x[0-9]}} ret i128 %val -; CHECK: ret } define i128 @test_extended(i128 %a, i16 %b) { -; CHECK-LABEL: test_extended: +; CHECK-LE-LABEL: test_extended: +; CHECK-LE: ; %bb.0: +; CHECK-LE-NEXT: ; kill: def $w2 killed $w2 def $x2 +; CHECK-LE-NEXT: sxth x8, w2 +; CHECK-LE-NEXT: adds x0, x0, w2, sxth #3 +; CHECK-LE-NEXT: asr x9, x8, #63 +; CHECK-LE-NEXT: extr x8, x9, x8, #61 +; CHECK-LE-NEXT: adc x1, x1, x8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: test_extended: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-BE-NEXT: sxth x8, w2 +; CHECK-BE-NEXT: adds x1, x1, w2, sxth #3 +; CHECK-BE-NEXT: asr x9, x8, #63 +; CHECK-BE-NEXT: extr x8, x9, x8, #61 +; CHECK-BE-NEXT: adc x0, x0, x8 +; CHECK-BE-NEXT: ret %ext = sext i16 %b to i128 %rhs = shl i128 %ext, 3 %val = add i128 %a, %rhs -; CHECK-LE: adds x0, x0, w2, sxth #3 -; CHECK-LE: adcs x1, x1, {{x[0-9]}} -; CHECK-BE: adds x1, x1, w2, sxth #3 -; CHECK-BE: adcs x0, x0, {{x[0-9]}} ret i128 %val -; CHECK: ret } diff --git a/llvm/test/CodeGen/AArch64/addcarry-crash.ll b/llvm/test/CodeGen/AArch64/addcarry-crash.ll --- a/llvm/test/CodeGen/AArch64/addcarry-crash.ll +++ b/llvm/test/CodeGen/AArch64/addcarry-crash.ll @@ -1,13 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s target triple = "arm64-apple-ios7.0" define i64 @foo(i64* nocapture readonly %ptr, i64 %a, i64 %b, i64 %c) local_unnamed_addr #0 { -; CHECK: ldr w8, [x0, #4] -; CHECK: lsr x9, x1, #32 -; CHECK: cmn x3, x2 -; CHECK: mul x8, x8, x9 -; CHECK: cinc x0, x8, hs -; CHECK: ret +; CHECK-LABEL: foo: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ldr w8, [x0, #4] +; CHECK-NEXT: lsr x9, x1, #32 +; CHECK-NEXT: cmn x3, x2 +; CHECK-NEXT: mul x8, x8, x9 +; CHECK-NEXT: cinc x0, x8, hs +; CHECK-NEXT: ret entry: %0 = lshr i64 %a, 32 %1 = load i64, i64* %ptr, align 8 diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -260,7 +260,7 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxp x9, x8, [x0] ; CHECK-NEXT: adds x10, x9, x2 -; CHECK-NEXT: adcs x11, x8, x3 +; CHECK-NEXT: adc x11, x8, x3 ; CHECK-NEXT: stlxp w12, x10, x11, [x0] ; CHECK-NEXT: cbnz w12, .LBB6_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end @@ -281,7 +281,7 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxp x9, x8, [x0] ; CHECK-NEXT: subs x10, x9, x2 -; CHECK-NEXT: sbcs x11, x8, x3 +; CHECK-NEXT: sbc x11, x8, x3 ; CHECK-NEXT: stlxp w12, x10, x11, [x0] ; CHECK-NEXT: cbnz w12, .LBB7_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -1748,28 +1748,28 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: uabd_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: fmov x11, d1 ; CHECK-NEXT: mov.d x8, v0[1] +; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov.d x10, v1[1] +; CHECK-NEXT: fmov x11, d1 ; CHECK-NEXT: asr x12, x9, #63 ; CHECK-NEXT: asr x13, x11, #63 ; CHECK-NEXT: subs x9, x9, x11 -; CHECK-NEXT: sbcs x11, x12, x13 +; CHECK-NEXT: sbc x11, x12, x13 ; CHECK-NEXT: asr x12, x8, #63 ; CHECK-NEXT: asr x13, x10, #63 ; CHECK-NEXT: subs x8, x8, x10 -; CHECK-NEXT: sbcs x10, x12, x13 -; CHECK-NEXT: negs x12, x8 -; CHECK-NEXT: ngcs x13, x10 -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: csel x2, x12, x8, lt -; CHECK-NEXT: csel x3, x13, x10, lt -; CHECK-NEXT: negs x8, x9 -; CHECK-NEXT: ngcs x10, x11 -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: csel x1, x10, x11, lt +; CHECK-NEXT: sbc x10, x12, x13 +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: asr x13, x10, #63 +; CHECK-NEXT: eor x9, x9, x12 +; CHECK-NEXT: eor x8, x8, x13 +; CHECK-NEXT: eor x10, x10, x13 +; CHECK-NEXT: subs x2, x8, x13 +; CHECK-NEXT: sbc x3, x10, x13 +; CHECK-NEXT: subs x8, x9, x12 +; CHECK-NEXT: eor x9, x11, x12 +; CHECK-NEXT: sbc x1, x9, x12 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll --- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -219,8 +219,7 @@ ; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload ; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload ; NOLSE-NEXT: adds x14, x8, #1 -; NOLSE-NEXT: mov x9, xzr -; NOLSE-NEXT: adcs x15, x11, x9 +; NOLSE-NEXT: cinc x15, x11, hs ; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start ; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 ; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 @@ -271,14 +270,13 @@ ; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload ; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload ; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload -; LSE-NEXT: adds x2, x8, #1 -; LSE-NEXT: mov x11, xzr -; LSE-NEXT: adcs x11, x10, x11 -; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 -; LSE-NEXT: mov x3, x11 ; LSE-NEXT: mov x0, x8 ; LSE-NEXT: mov x1, x10 ; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: adds x2, x8, #1 +; LSE-NEXT: cinc x11, x10, hs +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 ; LSE-NEXT: caspal x0, x1, x2, x3, [x9] ; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill ; LSE-NEXT: mov x9, x1 diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -23,7 +23,7 @@ ; CHECK-LABEL: u128_add: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x0, x0, x2 -; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: adc x1, x1, x3 ; CHECK-NEXT: ret %1 = add i128 %x, %y ret i128 %1 @@ -32,16 +32,10 @@ define { i128, i8 } @u128_checked_add(i128 %x, i128 %y) { ; CHECK-LABEL: u128_checked_add: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x2 -; CHECK-NEXT: adcs x9, x1, x3 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, lo -; CHECK-NEXT: mov x1, x9 -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: eor w2, w10, #0x1 +; CHECK-NEXT: adds x0, x0, x2 +; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: cset w8, hs +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -56,15 +50,9 @@ define { i128, i8 } @u128_overflowing_add(i128 %x, i128 %y) { ; CHECK-LABEL: u128_overflowing_add: ; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x2 -; CHECK-NEXT: adcs x9, x1, x3 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, lo -; CHECK-NEXT: mov x1, x9 -; CHECK-NEXT: csel w2, w10, w11, eq +; CHECK-NEXT: adds x0, x0, x2 +; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: cset w2, hs ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -80,11 +68,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x0, x2 ; CHECK-NEXT: adcs x9, x1, x3 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, lo -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: csinv x0, x8, xzr, eq ; CHECK-NEXT: csinv x1, x9, xzr, eq @@ -97,7 +81,7 @@ ; CHECK-LABEL: u128_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x0, x0, x2 -; CHECK-NEXT: sbcs x1, x1, x3 +; CHECK-NEXT: sbc x1, x1, x3 ; CHECK-NEXT: ret %1 = sub i128 %x, %y ret i128 %1 @@ -106,16 +90,10 @@ define { i128, i8 } @u128_checked_sub(i128 %x, i128 %y) { ; CHECK-LABEL: u128_checked_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x2 -; CHECK-NEXT: sbcs x9, x1, x3 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, hi -; CHECK-NEXT: mov x1, x9 -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: eor w2, w10, #0x1 +; CHECK-NEXT: subs x0, x0, x2 +; CHECK-NEXT: sbcs x1, x1, x3 +; CHECK-NEXT: cset w8, hs +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.usub.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -130,15 +108,9 @@ define { i128, i8 } @u128_overflowing_sub(i128 %x, i128 %y) { ; CHECK-LABEL: u128_overflowing_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x2 -; CHECK-NEXT: sbcs x9, x1, x3 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, hi -; CHECK-NEXT: mov x1, x9 -; CHECK-NEXT: csel w2, w10, w11, eq +; CHECK-NEXT: subs x0, x0, x2 +; CHECK-NEXT: sbcs x1, x1, x3 +; CHECK-NEXT: cset w2, hs ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.usub.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -154,11 +126,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbcs x9, x1, x3 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, hi -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: csel x0, xzr, x8, ne ; CHECK-NEXT: csel x1, xzr, x9, ne @@ -171,7 +139,7 @@ ; CHECK-LABEL: i128_add: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x0, x0, x2 -; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: adc x1, x1, x3 ; CHECK-NEXT: ret %1 = add i128 %x, %y ret i128 %1 @@ -181,12 +149,9 @@ ; CHECK-LABEL: i128_checked_add: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x0, x0, x2 -; CHECK-NEXT: eor x9, x1, x3 -; CHECK-NEXT: adcs x8, x1, x3 -; CHECK-NEXT: eor x10, x1, x8 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: bics xzr, x10, x9 -; CHECK-NEXT: cset w2, ge +; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: cset w8, vs +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.sadd.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -202,12 +167,8 @@ ; CHECK-LABEL: i128_overflowing_add: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x0, x0, x2 -; CHECK-NEXT: eor x9, x1, x3 -; CHECK-NEXT: adcs x8, x1, x3 -; CHECK-NEXT: eor x10, x1, x8 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: bics xzr, x10, x9 -; CHECK-NEXT: cset w2, lt +; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: cset w2, vs ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.sadd.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -222,14 +183,13 @@ ; CHECK-LABEL: i128_saturating_add: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x0, x2 -; CHECK-NEXT: eor x11, x1, x3 ; CHECK-NEXT: adcs x9, x1, x3 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x12, x1, x9 -; CHECK-NEXT: bics xzr, x12, x11 +; CHECK-NEXT: cset w11, vs +; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, lt -; CHECK-NEXT: csel x1, x11, x9, lt +; CHECK-NEXT: csel x0, x10, x8, ne +; CHECK-NEXT: csel x1, x11, x9, ne ; CHECK-NEXT: ret %1 = tail call i128 @llvm.sadd.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -239,7 +199,7 @@ ; CHECK-LABEL: i128_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x0, x0, x2 -; CHECK-NEXT: sbcs x1, x1, x3 +; CHECK-NEXT: sbc x1, x1, x3 ; CHECK-NEXT: ret %1 = sub i128 %x, %y ret i128 %1 @@ -249,12 +209,9 @@ ; CHECK-LABEL: i128_checked_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x0, x0, x2 -; CHECK-NEXT: eor x9, x1, x3 -; CHECK-NEXT: sbcs x8, x1, x3 -; CHECK-NEXT: eor x10, x1, x8 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: tst x9, x10 -; CHECK-NEXT: cset w2, ge +; CHECK-NEXT: sbcs x1, x1, x3 +; CHECK-NEXT: cset w8, vs +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.ssub.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -270,12 +227,8 @@ ; CHECK-LABEL: i128_overflowing_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x0, x0, x2 -; CHECK-NEXT: eor x9, x1, x3 -; CHECK-NEXT: sbcs x8, x1, x3 -; CHECK-NEXT: eor x10, x1, x8 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: tst x9, x10 -; CHECK-NEXT: cset w2, lt +; CHECK-NEXT: sbcs x1, x1, x3 +; CHECK-NEXT: cset w2, vs ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.ssub.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -290,14 +243,13 @@ ; CHECK-LABEL: i128_saturating_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x2 -; CHECK-NEXT: eor x11, x1, x3 ; CHECK-NEXT: sbcs x9, x1, x3 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x12, x1, x9 -; CHECK-NEXT: tst x11, x12 +; CHECK-NEXT: cset w11, vs +; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, lt -; CHECK-NEXT: csel x1, x11, x9, lt +; CHECK-NEXT: csel x0, x10, x8, ne +; CHECK-NEXT: csel x1, x11, x9, ne ; CHECK-NEXT: ret %1 = tail call i128 @llvm.ssub.sat.i128(i128 %x, i128 %y) ret i128 %1 diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll --- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds x0, x0, #1 -; CHECK-NEXT: adcs x1, x1, xzr +; CHECK-NEXT: cinc x1, x1, hs ; CHECK-NEXT: orr x8, x1, x0, lsr #60 ; CHECK-NEXT: cbnz x8, .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit diff --git a/llvm/test/CodeGen/AArch64/neg-abs.ll b/llvm/test/CodeGen/AArch64/neg-abs.ll --- a/llvm/test/CodeGen/AArch64/neg-abs.ll +++ b/llvm/test/CodeGen/AArch64/neg-abs.ll @@ -52,7 +52,7 @@ ; CHECK-NEXT: eor x9, x0, x8 ; CHECK-NEXT: eor x10, x1, x8 ; CHECK-NEXT: subs x0, x8, x9 -; CHECK-NEXT: sbcs x1, x8, x10 +; CHECK-NEXT: sbc x1, x8, x10 ; CHECK-NEXT: ret %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) %neg = sub nsw i128 0, %abs @@ -95,11 +95,11 @@ define i128 @abs128(i128 %x) { ; CHECK-LABEL: abs128: ; CHECK: // %bb.0: -; CHECK-NEXT: negs x8, x0 -; CHECK-NEXT: ngcs x9, x1 -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x0, x8, x0, lt -; CHECK-NEXT: csel x1, x9, x1, lt +; CHECK-NEXT: asr x8, x1, #63 +; CHECK-NEXT: eor x9, x0, x8 +; CHECK-NEXT: eor x10, x1, x8 +; CHECK-NEXT: subs x0, x9, x8 +; CHECK-NEXT: sbc x1, x10, x8 ; CHECK-NEXT: ret %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) ret i128 %abs diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -147,21 +147,23 @@ ; CHECK-NEXT: mov x8, v0.d[1] ; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x9, v1.d[1] -; CHECK-NEXT: asr x11, x10, #63 -; CHECK-NEXT: asr x12, x8, #63 -; CHECK-NEXT: asr x13, x9, #63 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: asr x14, x8, #63 +; CHECK-NEXT: asr x15, x9, #63 ; CHECK-NEXT: subs x8, x8, x9 -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: sbcs x12, x12, x13 -; CHECK-NEXT: asr x13, x9, #63 -; CHECK-NEXT: subs x9, x10, x9 -; CHECK-NEXT: sbcs x10, x11, x13 -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cneg x9, x9, lt -; CHECK-NEXT: cmp x12, #0 -; CHECK-NEXT: cneg x8, x8, lt -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: asr x13, x11, #63 +; CHECK-NEXT: sbc x9, x14, x15 +; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: asr x9, x9, #63 +; CHECK-NEXT: sbc x11, x12, x13 +; CHECK-NEXT: eor x8, x8, x9 +; CHECK-NEXT: asr x11, x11, #63 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: eor x10, x10, x11 +; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %a.sext = sext <2 x i64> %a to <2 x i128> @@ -325,17 +327,19 @@ ; CHECK-NEXT: mov x8, v0.d[1] ; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x11, d1 ; CHECK-NEXT: subs x8, x8, x9 -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: ngcs x11, xzr -; CHECK-NEXT: subs x9, x10, x9 -; CHECK-NEXT: ngcs x10, xzr -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cneg x9, x9, lt -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cneg x8, x8, lt -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: ngc x9, xzr +; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: ngc x11, xzr +; CHECK-NEXT: asr x9, x9, #63 +; CHECK-NEXT: asr x11, x11, #63 +; CHECK-NEXT: eor x8, x8, x9 +; CHECK-NEXT: eor x10, x10, x11 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %a.zext = zext <2 x i64> %a to <2 x i128> diff --git a/llvm/test/CodeGen/AArch64/nzcv-save.ll b/llvm/test/CodeGen/AArch64/nzcv-save.ll --- a/llvm/test/CodeGen/AArch64/nzcv-save.ll +++ b/llvm/test/CodeGen/AArch64/nzcv-save.ll @@ -1,11 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-eabi | FileCheck %s -; CHECK: mrs [[NZCV_SAVE:x[0-9]+]], NZCV -; CHECK: msr NZCV, [[NZCV_SAVE]] - ; DAG ends up with two uses for the flags from an ADCS node, which means they ; must be saved for later. define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp { +; CHECK-LABEL: f: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x9, x8, [x2] +; CHECK-NEXT: ldp x11, x10, [x3] +; CHECK-NEXT: ldp x13, x12, [x2, #16] +; CHECK-NEXT: ldp x14, x15, [x3, #16] +; CHECK-NEXT: adds x9, x9, x11 +; CHECK-NEXT: adcs x8, x8, x10 +; CHECK-NEXT: adcs x10, x13, x14 +; CHECK-NEXT: adc x11, x12, x15 +; CHECK-NEXT: orr x12, x12, #0x100 +; CHECK-NEXT: adc x12, x12, x15 +; CHECK-NEXT: stp x9, x8, [x0] +; CHECK-NEXT: stp x10, x11, [x0, #16] +; CHECK-NEXT: stp x10, x12, [x1, #16] +; CHECK-NEXT: stp x9, x8, [x1] +; CHECK-NEXT: ret entry: %c = load i256, i256* %cc %d = load i256, i256* %dd diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -351,23 +351,21 @@ ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 -; CHECK-NEXT: eor x10, x3, x7 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: eor x11, x3, x9 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: bics xzr, x11, x10 -; CHECK-NEXT: eor x10, x1, x5 -; CHECK-NEXT: csel x2, x12, x8, lt -; CHECK-NEXT: eor x8, x12, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, lt +; CHECK-NEXT: cset w10, vs +; CHECK-NEXT: asr x11, x9, #63 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csel x2, x11, x8, ne +; CHECK-NEXT: eor x8, x11, #0x8000000000000000 +; CHECK-NEXT: csel x3, x8, x9, ne ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: eor x11, x1, x9 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: bics xzr, x11, x10 -; CHECK-NEXT: eor x10, x12, #0x8000000000000000 -; CHECK-NEXT: csel x8, x12, x8, lt -; CHECK-NEXT: csel x1, x10, x9, lt +; CHECK-NEXT: cset w10, vs +; CHECK-NEXT: asr x11, x9, #63 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: eor x10, x11, #0x8000000000000000 +; CHECK-NEXT: csel x8, x11, x8, ne +; CHECK-NEXT: csel x1, x10, x9, ne ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -354,23 +354,21 @@ ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 -; CHECK-NEXT: eor x10, x3, x7 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: eor x11, x3, x9 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: tst x10, x11 -; CHECK-NEXT: eor x10, x1, x5 -; CHECK-NEXT: csel x2, x12, x8, lt -; CHECK-NEXT: eor x8, x12, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, lt +; CHECK-NEXT: cset w10, vs +; CHECK-NEXT: asr x11, x9, #63 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csel x2, x11, x8, ne +; CHECK-NEXT: eor x8, x11, #0x8000000000000000 +; CHECK-NEXT: csel x3, x8, x9, ne ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: eor x11, x1, x9 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: tst x10, x11 -; CHECK-NEXT: eor x10, x12, #0x8000000000000000 -; CHECK-NEXT: csel x8, x12, x8, lt -; CHECK-NEXT: csel x1, x10, x9, lt +; CHECK-NEXT: cset w10, vs +; CHECK-NEXT: asr x11, x9, #63 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: eor x10, x11, #0x8000000000000000 +; CHECK-NEXT: csel x8, x11, x8, ne +; CHECK-NEXT: csel x1, x10, x9, ne ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -350,21 +350,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cmp x8, x2 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, x3 -; CHECK-NEXT: cset w11, lo -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x3, x9, xzr, eq ; CHECK-NEXT: csinv x2, x8, xzr, eq +; CHECK-NEXT: csinv x3, x9, xzr, eq ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, lo -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: csinv x8, x8, xzr, eq ; CHECK-NEXT: csinv x1, x9, xzr, eq diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -346,21 +346,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: cmp x8, x2 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x9, x3 -; CHECK-NEXT: cset w11, hi -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x3, xzr, x9, ne ; CHECK-NEXT: csel x2, xzr, x8, ne +; CHECK-NEXT: csel x3, xzr, x9, ne ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: cset w11, hi -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: csel x8, xzr, x8, ne ; CHECK-NEXT: csel x1, xzr, x9, ne diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -278,18 +278,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cmp x8, x2 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, x3 -; CHECK-NEXT: cset w11, lo -; CHECK-NEXT: csel w10, w10, w11, eq +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: adds x11, x0, x4 ; CHECK-NEXT: adcs x12, x1, x5 -; CHECK-NEXT: cmp x11, x0 -; CHECK-NEXT: cset w13, lo -; CHECK-NEXT: cmp x12, x1 -; CHECK-NEXT: cset w14, lo -; CHECK-NEXT: csel w13, w13, w14, eq +; CHECK-NEXT: cset w13, hs ; CHECK-NEXT: fmov s0, w13 ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: ldr x10, [sp] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll @@ -149,7 +149,7 @@ ; CHECK-LABEL: test_v2i128: ; CHECK: // %bb.0: ; CHECK-NEXT: adds x0, x0, x2 -; CHECK-NEXT: adcs x1, x1, x3 +; CHECK-NEXT: adc x1, x1, x3 ; CHECK-NEXT: ret %b = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) ret i128 %b