Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -402,11 +402,13 @@ setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f16, Custom); + setOperationAction(ISD::SELECT, MVT::bf16, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -603,7 +605,7 @@ if (!Subtarget->hasFullFP16()) { for (auto Op : - {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC, + {ISD::SETCC, ISD::SELECT_CC, ISD::BR_CC, ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FMA, ISD::FNEG, ISD::FABS, ISD::FCEIL, @@ -8434,7 +8436,32 @@ RHS = DAG.getConstant(0, DL, CCVal.getValueType()); CC = ISD::SETNE; } - return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); + + // If we are lowering a f16 and we do not have fullf16, convert to a f32 in + // order to use FCSELSrrr + if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) { + TVal = SDValue( + DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, + DAG.getUNDEF(MVT::f32), TVal, + DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), + 0); + FVal = SDValue( + DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, + DAG.getUNDEF(MVT::f32), FVal, + DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), + 0); + } + + SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); + + if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) { + Res = SDValue( + DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, Ty, Res, + DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), + 0); + } + + return Res; } SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4205,6 +4205,10 @@ defm FCSEL : FPCondSelect<"fcsel">; +let Predicates = [HasFullFP16] in +def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), + (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; + // CSEL instructions providing f128 types need to be handled by a // pseudo-instruction since the eventual code will need to introduce basic // blocks and control flow. Index: llvm/test/CodeGen/AArch64/arm64-fmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -76,13 +76,12 @@ define float @test_f16(half %in) { ; CHECK-LABEL: test_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: cset w8, lt -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: fcsel s0, s0, s1, ne -; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fcvt s1, h0 +; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: ldr h2, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fcsel s0, s0, s2, lt ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: ret %cmp = fcmp nnan ult half %in, 0.000000e+00 Index: llvm/test/CodeGen/AArch64/bf16-select.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/bf16-select.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-BASE +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-FP16 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+bf16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-BASE +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+bf16,+fullfp16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-FP16 + +define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) { +; CHECK-BASE-LABEL: test_select: +; CHECK-BASE: // %bb.0: +; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-BASE-NEXT: cmp w0, #0 +; CHECK-BASE-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-BASE-NEXT: fcsel s0, s0, s1, ne +; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-FP16-LABEL: test_select: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: cmp w0, #0 +; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-NEXT: ret + %r = select i1 %c, bfloat %a, bfloat %b + ret bfloat %r +} + +define bfloat @test_select_fcc(bfloat %a, bfloat %b, float %c, float %d) { +; CHECK-BASE-LABEL: test_select_fcc: +; CHECK-BASE: // %bb.0: +; CHECK-BASE-NEXT: fcmp s2, s3 +; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-BASE-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-BASE-NEXT: fcsel s0, s0, s1, ne +; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-FP16-LABEL: test_select_fcc: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp s2, s3 +; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-NEXT: ret + %cc = fcmp une float %c, %d + %r = select i1 %cc, bfloat %a, bfloat %b + ret bfloat %r +} + +define bfloat @test_select_icc(bfloat %a, bfloat %b, i32 %c, i32 %d) { +; CHECK-BASE-LABEL: test_select_icc: +; CHECK-BASE: // %bb.0: +; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-BASE-NEXT: cmp w0, w1 +; CHECK-BASE-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-BASE-NEXT: fcsel s0, s0, s1, ne +; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-FP16-LABEL: test_select_icc: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: cmp w0, w1 +; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-NEXT: ret + %cc = icmp ne i32 %c, %d + %r = select i1 %cc, bfloat %a, bfloat %b + ret bfloat %r +} Index: llvm/test/CodeGen/AArch64/f16-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/f16-instructions.ll +++ llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -167,11 +167,8 @@ } ; CHECK-CVT-LABEL: test_select: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: cmp w0, #0 ; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select: @@ -187,11 +184,8 @@ ; CHECK-CVT-LABEL: test_select_cc: ; CHECK-CVT-DAG: fcvt s3, h3 ; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcvt s1, h1 -; CHECK-CVT-DAG: fcvt s0, h0 ; CHECK-CVT-DAG: fcmp s2, s3 ; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select_cc: @@ -224,11 +218,8 @@ } ; CHECK-CVT-LABEL: test_select_cc_f16_f32: -; CHECK-CVT-DAG: fcvt s0, h0 -; CHECK-CVT-DAG: fcvt s1, h1 ; CHECK-CVT-DAG: fcmp s2, s3 ; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select_cc_f16_f32: @@ -485,16 +476,14 @@ } ; CHECK-COMMON-LABEL: test_fccmp: -; CHECK-CVT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmov s1, #8.00000000 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: fmov s1, #5.00000000 -; CHECK-CVT-NEXT: cset w8, gt -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w9, mi -; CHECK-CVT-NEXT: tst w8, w9 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT: fcvt s1, h0 +; CHECK-CVT-NEXT: fmov s2, #5.00000000 +; CHECK-CVT-NEXT: fcmp s1, s2 +; CHECK-CVT-NEXT: fmov s2, #8.00000000 +; CHECK-CVT-NEXT: adrp x8 +; CHECK-CVT-NEXT: fccmp s1, s2, #4, mi +; CHECK-CVT-NEXT: ldr h1, [x8, +; CHECK-CVT-NEXT: fcsel s0, s0, s1, gt ; CHECK-CVT-NEXT: str h0, [x0] ; CHECK-CVT-NEXT: ret ; CHECK-FP16: fmov h1, #5.00000000