diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -987,6 +987,7 @@ SelectionDAG &DAG) const; SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -414,6 +414,8 @@ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); + setOperationAction(ISD::SETCCCARRY, MVT::i64, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); @@ -5553,6 +5555,8 @@ case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); + case ISD::SETCCCARRY: + return LowerSETCCCARRY(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: @@ -8543,6 +8547,36 @@ return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res; } +SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op, + SelectionDAG &DAG) const { + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + EVT VT = LHS.getValueType(); + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDLoc DL(Op); + SDValue Carry = Op.getOperand(2); + // SBCS uses a carry not a borrow so the carry flag should be inverted first. + SDValue InvCarry = valueToCarryFlag(Carry, DAG, true); + SDValue Cmp = DAG.getNode(AArch64ISD::SBCS, DL, DAG.getVTList(VT, MVT::Glue), + LHS, RHS, InvCarry); + + EVT OpVT = Op.getValueType(); + SDValue TVal = DAG.getConstant(1, DL, OpVT); + SDValue FVal = DAG.getConstant(0, DL, OpVT); + + ISD::CondCode Cond = cast(Op.getOperand(3))->get(); + ISD::CondCode CondInv = ISD::getSetCCInverse(Cond, VT); + SDValue CCVal = + DAG.getConstant(changeIntCCToAArch64CC(CondInv), DL, MVT::i32); + // Inputs are swapped because the condition is inverted. This will allow + // matching with a single CSINC instruction. + return DAG.getNode(AArch64ISD::CSEL, DL, OpVT, FVal, TVal, CCVal, + Cmp.getValue(1)); +} + SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, const SDLoc &dl, diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -301,14 +301,10 @@ ; CHECK-NEXT: .LBB8_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxp x9, x8, [x0] -; CHECK-NEXT: cmp x9, x2 -; CHECK-NEXT: cset w10, ls -; CHECK-NEXT: cmp x8, x3 -; CHECK-NEXT: cset w11, le -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x8, x3, ne -; CHECK-NEXT: csel x11, x9, x2, ne +; CHECK-NEXT: cmp x2, x9 +; CHECK-NEXT: sbcs xzr, x3, x8 +; CHECK-NEXT: csel x10, x8, x3, ge +; CHECK-NEXT: csel x11, x9, x2, ge ; CHECK-NEXT: stlxp w12, x11, x10, [x0] ; CHECK-NEXT: cbnz w12, .LBB8_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end @@ -328,14 +324,10 @@ ; CHECK-NEXT: .LBB9_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxp x9, x8, [x0] -; CHECK-NEXT: cmp x9, x2 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x8, x3 -; CHECK-NEXT: cset w11, gt -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x8, x3, ne -; CHECK-NEXT: csel x11, x9, x2, ne +; CHECK-NEXT: cmp x2, x9 +; CHECK-NEXT: sbcs xzr, x3, x8 +; CHECK-NEXT: csel x10, x8, x3, lt +; CHECK-NEXT: csel x11, x9, x2, lt ; CHECK-NEXT: stlxp w12, x11, x10, [x0] ; CHECK-NEXT: cbnz w12, .LBB9_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end @@ -355,14 +347,10 @@ ; CHECK-NEXT: .LBB10_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxp x9, x8, [x0] -; CHECK-NEXT: cmp x9, x2 -; CHECK-NEXT: cset w10, ls -; CHECK-NEXT: cmp x8, x3 -; CHECK-NEXT: cset w11, ls -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x8, x3, ne -; CHECK-NEXT: csel x11, x9, x2, ne +; CHECK-NEXT: cmp x2, x9 +; CHECK-NEXT: sbcs xzr, x3, x8 +; CHECK-NEXT: csel x10, x8, x3, hs +; CHECK-NEXT: csel x11, x9, x2, hs ; CHECK-NEXT: stlxp w12, x11, x10, [x0] ; CHECK-NEXT: cbnz w12, .LBB10_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end @@ -382,14 +370,10 @@ ; CHECK-NEXT: .LBB11_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxp x9, x8, [x0] -; CHECK-NEXT: cmp x9, x2 -; CHECK-NEXT: cset w10, hi -; CHECK-NEXT: cmp x8, x3 -; CHECK-NEXT: cset w11, hi -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x8, x3, ne -; CHECK-NEXT: csel x11, x9, x2, ne +; CHECK-NEXT: cmp x2, x9 +; CHECK-NEXT: sbcs xzr, x3, x8 +; CHECK-NEXT: csel x10, x8, x3, lo +; CHECK-NEXT: csel x11, x9, x2, lo ; CHECK-NEXT: stlxp w12, x11, x10, [x0] ; CHECK-NEXT: cbnz w12, .LBB11_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -398,13 +398,9 @@ ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: csel w9, w10, w9, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x0, x8, xzr, ne +; CHECK-NEXT: cmp xzr, x8 +; CHECK-NEXT: ngcs xzr, x9 +; CHECK-NEXT: csel x0, x8, xzr, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -461,13 +457,9 @@ ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: csel w9, w10, w9, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x0, x8, xzr, ne +; CHECK-NEXT: cmp xzr, x8 +; CHECK-NEXT: ngcs xzr, x9 +; CHECK-NEXT: csel x0, x8, xzr, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -530,13 +522,9 @@ ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: csel w9, w10, w9, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x0, x8, xzr, ne +; CHECK-NEXT: cmp xzr, x8 +; CHECK-NEXT: ngcs xzr, x9 +; CHECK-NEXT: csel x0, x8, xzr, lt ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -397,12 +397,12 @@ ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload @@ -411,23 +411,15 @@ ; CHECK-NEXT: cmp x20, #1 ; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cset w11, gt -; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: csel w9, w12, w9, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x8, x8, xzr, ne -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: ngcs xzr, x11 +; CHECK-NEXT: csel x10, x10, xzr, lt +; CHECK-NEXT: cmp xzr, x8 +; CHECK-NEXT: ngcs xzr, x9 +; CHECK-NEXT: csel x8, x8, xzr, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -511,12 +503,12 @@ ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload @@ -525,23 +517,15 @@ ; CHECK-NEXT: cmp x20, #1 ; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cset w11, gt -; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: csel w8, w12, w8, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x8, x9, xzr, ne -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: ngcs xzr, x11 +; CHECK-NEXT: csel x10, x10, xzr, lt +; CHECK-NEXT: cmp xzr, x9 +; CHECK-NEXT: ngcs xzr, x8 +; CHECK-NEXT: csel x8, x9, xzr, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -637,12 +621,12 @@ ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload @@ -651,23 +635,15 @@ ; CHECK-NEXT: cmp x20, #1 ; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cset w11, gt -; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: csel w8, w12, w8, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x8, x9, xzr, ne -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: ngcs xzr, x11 +; CHECK-NEXT: csel x10, x10, xzr, lt +; CHECK-NEXT: cmp xzr, x9 +; CHECK-NEXT: ngcs xzr, x8 +; CHECK-NEXT: csel x8, x9, xzr, lt ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret