Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -1047,16 +1047,19 @@ setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); - setOperationAction(ISD::SETCC, MVT::f16, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::SETCC, MVT::f16, Expand); + setOperationAction(ISD::SELECT, MVT::f16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); + } // Thumb-1 cannot currently select ARMISD::SUBE. if (!Subtarget->isThumb1Only()) @@ -1064,7 +1067,8 @@ setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::f16, Custom); + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::BR_CC, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -4522,7 +4526,9 @@ // Normalize the fp compare. If RHS is zero we keep it there so we match // CMPFPw0 instead of CMPFP. if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) && - (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { + (TrueVal.getValueType() == MVT::f16 || + TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); Index: llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll +++ llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll @@ -4,11 +4,11 @@ ; SOFTFP: ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 -; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 +; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 -; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16 +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 ; Test fast-isel @@ -703,37 +703,167 @@ ret half %2 ; CHECK-LABEL: select_cc1: + +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32: it eq +; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} } +; FIXME: more tests need to be added for VSELGE and VSELGT. +; That is, more combinations of immediate operands that can or can't +; be encoded as an FP16 immediate need to be added here. +; ; 36. VSELGE -define half @select_cc2() { +define half @select_cc_ge1() { %1 = fcmp nsz oge half undef, 0xH0001 %2 = select i1 %1, half 0xHC000, half 0xH0002 ret half %2 -; CHECK-LABEL: select_cc2: -; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}} +; CHECK-LABEL: select_cc_ge1: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it ge +; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} +} + +; +; FIXME: add fcmp ole, ult here. +; + +define half @select_cc_ge3() { + %1 = fcmp nsz ugt half undef, 0xH0001 + %2 = select i1 %1, half 0xHC000, half 0xH0002 + ret half %2 + +; CHECK-LABEL: select_cc_ge3: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it hi +; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} } ; 37. VSELGT -define half @select_cc3() { +define half @select_cc_gt1() { %1 = fcmp nsz ogt half undef, 0xH0001 %2 = select i1 %1, half 0xHC000, half 0xH0002 ret half %2 -; CHECK-LABEL: select_cc3: -; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} +; CHECK-LABEL: select_cc_gt1: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it gt +; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} } -; 38. VSELVS -define half @select_cc4() { - %1 = fcmp nsz ueq half undef, 0xH0001 +define half @select_cc_gt2() { + %1 = fcmp nsz uge half undef, 0xH0001 %2 = select i1 %1, half 0xHC000, half 0xH0002 ret half %2 +; CHECK-LABEL: select_cc_gt2: + +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} + +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32-NEXT: it pl +; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} +} + +; +; FIXME: add fcmp ule, olt here. +; + +; 38. VSELVS +define float @select_cc4(float %a.coerce) { +entry: + %0 = bitcast float %a.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + + %2 = fcmp nsz ueq half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + + %4 = bitcast half %3 to i16 + %tmp4.0.insert.ext = zext i16 %4 to i32 + %5 = bitcast i32 %tmp4.0.insert.ext to float + ret float %5 + ; CHECK-LABEL: select_cc4: -; CHECK-HARDFP-FULLFP16: vselvs.f16 s0, s{{.}}, s{{.}} + +; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} +; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] +; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] + +; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} +; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] +; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 +; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] + +; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 +; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} +; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] +; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 +; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr +; CHECK-SOFTFP-FP16-T32: it eq +; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-T32: it vs +; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] +; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] } ; 39. VSQRT - TODO