Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -328,25 +328,35 @@ setOperationAction(ISD::FREM, MVT::f16, Promote); setOperationAction(ISD::FREM, MVT::v4f16, Promote); + setOperationAction(ISD::FREM, MVT::v8f16, Promote); setOperationAction(ISD::FPOW, MVT::f16, Promote); setOperationAction(ISD::FPOW, MVT::v4f16, Promote); + setOperationAction(ISD::FPOW, MVT::v8f16, Promote); setOperationAction(ISD::FPOWI, MVT::f16, Promote); setOperationAction(ISD::FCOS, MVT::f16, Promote); setOperationAction(ISD::FCOS, MVT::v4f16, Promote); + setOperationAction(ISD::FCOS, MVT::v8f16, Promote); setOperationAction(ISD::FSIN, MVT::f16, Promote); setOperationAction(ISD::FSIN, MVT::v4f16, Promote); + setOperationAction(ISD::FSIN, MVT::v8f16, Promote); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote); setOperationAction(ISD::FEXP, MVT::f16, Promote); setOperationAction(ISD::FEXP, MVT::v4f16, Promote); + setOperationAction(ISD::FEXP, MVT::v8f16, Promote); setOperationAction(ISD::FEXP2, MVT::f16, Promote); setOperationAction(ISD::FEXP2, MVT::v4f16, Promote); + setOperationAction(ISD::FEXP2, MVT::v8f16, Promote); setOperationAction(ISD::FLOG, MVT::f16, Promote); setOperationAction(ISD::FLOG, MVT::v4f16, Promote); + setOperationAction(ISD::FLOG, MVT::v8f16, Promote); setOperationAction(ISD::FLOG2, MVT::f16, Promote); setOperationAction(ISD::FLOG2, MVT::v4f16, Promote); + setOperationAction(ISD::FLOG2, MVT::v8f16, Promote); setOperationAction(ISD::FLOG10, MVT::f16, Promote); setOperationAction(ISD::FLOG10, MVT::v4f16, Promote); + setOperationAction(ISD::FLOG10, MVT::v8f16, Promote); if (!Subtarget->hasFullFP16()) { setOperationAction(ISD::SELECT, MVT::f16, Promote); @@ -401,40 +411,28 @@ setOperationAction(ISD::FRINT, MVT::v4f16, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); - } - - // v8f16 is also a storage-only type, so expand it. - setOperationAction(ISD::FABS, MVT::v8f16, Expand); - setOperationAction(ISD::FADD, MVT::v8f16, Expand); - setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); - setOperationAction(ISD::FCOS, MVT::v8f16, Expand); - setOperationAction(ISD::FDIV, MVT::v8f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); - setOperationAction(ISD::FMA, MVT::v8f16, Expand); - setOperationAction(ISD::FMUL, MVT::v8f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); - setOperationAction(ISD::FNEG, MVT::v8f16, Expand); - setOperationAction(ISD::FPOW, MVT::v8f16, Expand); - setOperationAction(ISD::FREM, MVT::v8f16, Expand); - setOperationAction(ISD::FROUND, MVT::v8f16, Expand); - setOperationAction(ISD::FRINT, MVT::v8f16, Expand); - setOperationAction(ISD::FSIN, MVT::v8f16, Expand); - setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); - setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); - setOperationAction(ISD::FSUB, MVT::v8f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); - setOperationAction(ISD::SETCC, MVT::v8f16, Expand); - setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); - setOperationAction(ISD::SELECT, MVT::v8f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); - setOperationAction(ISD::FEXP, MVT::v8f16, Expand); - setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); - setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); + setOperationAction(ISD::FABS, MVT::v8f16, Expand); + setOperationAction(ISD::FADD, MVT::v8f16, Expand); + setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); + setOperationAction(ISD::FDIV, MVT::v8f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); + setOperationAction(ISD::FMA, MVT::v8f16, Expand); + setOperationAction(ISD::FMUL, MVT::v8f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); + setOperationAction(ISD::FNEG, MVT::v8f16, Expand); + setOperationAction(ISD::FROUND, MVT::v8f16, Expand); + setOperationAction(ISD::FRINT, MVT::v8f16, Expand); + setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); + setOperationAction(ISD::FSUB, MVT::v8f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); + setOperationAction(ISD::SETCC, MVT::v8f16, Expand); + setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); + setOperationAction(ISD::SELECT, MVT::v8f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); + } // AArch64 has implementations of a lot of rounding-like FP operations. for (MVT Ty : {MVT::f32, MVT::f64}) { Index: test/CodeGen/AArch64/arm64-vfloatintrinsics.ll =================================================================== --- test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -5,6 +5,7 @@ %v2f32 = type <2 x float> %v4f16 = type <4 x half> +%v8f16 = type <8 x half> ; CHECK-LABEL: test_v2f32.sqrt: define %v2f32 @test_v2f32.sqrt(%v2f32 %a) { @@ -20,11 +21,22 @@ ; CHECK: fsqrt s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.sqrt: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fsqrt.4h ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.sqrt(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 fsqrt tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.sqrt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fsqrt.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a) + ret %v8f16 %1 +} ; CHECK: test_v2f32.powi: define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) { ; CHECK: pow @@ -93,10 +105,20 @@ ; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.fma: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fmla.4h %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c) ret %v4f16 %1 } +define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) { +; Filechecks are unwieldy with 16 fcvt and 8 fma tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.fma: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fmla.8h + %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c) + ret %v8f16 %1 +} ; CHECK-LABEL: test_v2f32.fabs: define %v2f32 @test_v2f32.fabs(%v2f32 %a) { ; CHECK: fabs.2s @@ -111,11 +133,22 @@ ; CHECK: fabs s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.fabs: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fabs.4h ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.fabs(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 fabs tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.fabs: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fabs.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a) + ret %v8f16 %1 +} ; CHECK-LABEL: test_v2f32.floor: define %v2f32 @test_v2f32.floor(%v2f32 %a) { ; CHECK: frintm.2s @@ -130,11 +163,22 @@ ; CHECK: frintm s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.floor: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frintm.4h ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.floor(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 frintm tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.floor: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: frintm.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a) + ret %v8f16 %1 +} ; CHECK-LABEL: test_v2f32.ceil: define %v2f32 @test_v2f32.ceil(%v2f32 %a) { ; CHECK: frintp.2s @@ -149,11 +193,22 @@ ; CHECK: frintp s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.ceil: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frintp.4h ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.ceil(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.ceil: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: frintp.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a) + ret %v8f16 %1 +} ; CHECK-LABEL: test_v2f32.trunc: define %v2f32 @test_v2f32.trunc(%v2f32 %a) { ; CHECK: frintz.2s @@ -173,6 +228,16 @@ %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.trunc(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.trunc: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: frintz.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a) + ret %v8f16 %1 +} ; CHECK-LABEL: test_v2f32.rint: define %v2f32 @test_v2f32.rint(%v2f32 %a) { ; CHECK: frintx.2s @@ -187,11 +252,21 @@ ; CHECK: frintx s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.rint: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frintx.4h ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.rint(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.rint: +; CHECK-FP16: frintx.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a) + ret %v8f16 %1 +} ; CHECK-LABEL: test_v2f32.nearbyint: define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) { ; CHECK: frinti.2s @@ -206,14 +281,26 @@ ; CHECK: frinti s{{.}}, s{{.}} ; CHECK-FP16-LABEL: test_v4f16.nearbyint: +; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frinti.4h ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a) ret %v4f16 %1 } +define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) { +; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_v8f16.nearbyint: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: frinti.8h +; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a) + ret %v8f16 %1 +} declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0 declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0 +declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0 declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 @@ -227,24 +314,31 @@ declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0 declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0 +declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0 declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0 declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0 +declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0 declare %v2f32 @llvm.floor.v2f32(%v2f32) #0 declare %v4f16 @llvm.floor.v4f16(%v4f16) #0 +declare %v8f16 @llvm.floor.v8f16(%v8f16) #0 declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0 declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0 +declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0 declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0 declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0 +declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0 declare %v2f32 @llvm.rint.v2f32(%v2f32) #0 declare %v4f16 @llvm.rint.v4f16(%v4f16) #0 +declare %v8f16 @llvm.rint.v8f16(%v8f16) #0 declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0 declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0 +declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0 ;;; Index: test/CodeGen/AArch64/fp16-v8-instructions.ll =================================================================== --- test/CodeGen/AArch64/fp16-v8-instructions.ll +++ test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -1,40 +1,46 @@ -; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16 --check-prefix=CHECK define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) { entry: -; CHECK-LABEL: add_h: -; CHECK: fcvt -; CHECK: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fadd -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK: fcvt +; CHECK-CVT-LABEL: add_h: +; CHECK-CVT: fcvt +; CHECK-CVT: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fadd +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT: fcvt + +; CHECK-FP16-LABEL: add_h: +; CHECK-FP16: fadd v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: ret + %0 = fadd <8 x half> %a, %b ret <8 x half> %0 } @@ -42,39 +48,44 @@ define <8 x half> @sub_h(<8 x half> %a, <8 x half> %b) { entry: -; CHECK-LABEL: sub_h: -; CHECK: fcvt -; CHECK: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fsub -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK: fcvt +; CHECK-CVT-LABEL: sub_h: +; CHECK-CVT: fcvt +; CHECK-CVT: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fsub +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT: fcvt + +; CHECK-FP16-LABEL: sub_h: +; CHECK-FP16: fsub v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: ret + %0 = fsub <8 x half> %a, %b ret <8 x half> %0 } @@ -82,39 +93,44 @@ define <8 x half> @mul_h(<8 x half> %a, <8 x half> %b) { entry: -; CHECK-LABEL: mul_h: -; CHECK: fcvt -; CHECK: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fmul -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK: fcvt +; CHECK-CVT-LABEL: mul_h: +; CHECK-CVT: fcvt +; CHECK-CVT: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fmul +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT: fcvt + +; CHECK-FP16-LABEL: mul_h: +; CHECK-FP16: fmul v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: ret + %0 = fmul <8 x half> %a, %b ret <8 x half> %0 } @@ -122,39 +138,44 @@ define <8 x half> @div_h(<8 x half> %a, <8 x half> %b) { entry: -; CHECK-LABEL: div_h: -; CHECK: fcvt -; CHECK: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fdiv -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK: fcvt +; CHECK-CVT-LABEL: div_h: +; CHECK-CVT: fcvt +; CHECK-CVT: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fdiv +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT: fcvt + +; CHECK-FP16-LABEL: div_h: +; CHECK-FP16: fdiv v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: ret + %0 = fdiv <8 x half> %a, %b ret <8 x half> %0 } @@ -421,86 +442,254 @@ ret <8 x i16> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_une: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp une <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 16 csel tests. Skipped. define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ueq: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ueq <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ugt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ugt <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_uge: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp uge <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ult: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ult <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ule: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ule <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_uno: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp uno <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_one: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp one <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_oeq: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp oeq <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ogt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ogt <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_oge: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp oge <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_olt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp olt <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ole: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ole <8 x half> %a, %b ret <8 x i1> %1 } -; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 { +; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests, so skipped for -fullfp16. + +; CHECK-FP16-LABEL: test_fcmp_ord: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} +; CHECK-FP16-DAG: fcmp h{{[0-9]}}, h{{[0-9]}} + %1 = fcmp ord <8 x half> %a, %b ret <8 x i1> %1 }