Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -333,36 +333,38 @@ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::v4f16, Promote); - setOperationAction(ISD::FREM, MVT::v8f16, Promote); + setOperationAction(ISD::FREM, MVT::v4f16, Expand); + setOperationAction(ISD::FREM, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::v4f16, Promote); - setOperationAction(ISD::FPOW, MVT::v8f16, Promote); + setOperationAction(ISD::FPOW, MVT::v4f16, Expand); + setOperationAction(ISD::FPOW, MVT::v8f16, Expand); setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); + setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::v4f16, Promote); - setOperationAction(ISD::FCOS, MVT::v8f16, Promote); + setOperationAction(ISD::FCOS, MVT::v4f16, Expand); + setOperationAction(ISD::FCOS, MVT::v8f16, Expand); setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::v4f16, Promote); - setOperationAction(ISD::FSIN, MVT::v8f16, Promote); + setOperationAction(ISD::FSIN, MVT::v4f16, Expand); + setOperationAction(ISD::FSIN, MVT::v8f16, Expand); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); + setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::v4f16, Promote); - setOperationAction(ISD::FEXP, MVT::v8f16, Promote); + setOperationAction(ISD::FEXP, MVT::v4f16, Expand); + setOperationAction(ISD::FEXP, MVT::v8f16, Expand); setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::v4f16, Promote); - setOperationAction(ISD::FEXP2, MVT::v8f16, Promote); + setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); + setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::v4f16, Promote); - setOperationAction(ISD::FLOG, MVT::v8f16, Promote); + setOperationAction(ISD::FLOG, MVT::v4f16, Expand); + setOperationAction(ISD::FLOG, MVT::v8f16, Expand); setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::v4f16, Promote); - setOperationAction(ISD::FLOG2, MVT::v8f16, Promote); + setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); + setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::v4f16, Promote); - setOperationAction(ISD::FLOG10, MVT::v8f16, Promote); + setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); + setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); if (!Subtarget->hasFullFP16()) { setOperationAction(ISD::SELECT, MVT::f16, Promote); Index: test/CodeGen/AArch64/arm64-vfloatintrinsics.ll =================================================================== --- test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -1,42 +1,332 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 | FileCheck %s -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16 +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP16 +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16 -;;; Float vectors +;;; Half vectors -%v2f32 = type <2 x float> %v4f16 = type <4 x half> -%v8f16 = type <8 x half> -; CHECK-LABEL: test_v2f32.sqrt: -define %v2f32 @test_v2f32.sqrt(%v2f32 %a) { - ; CHECK: fsqrt.2s - %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a) - ret %v2f32 %1 -} define %v4f16 @test_v4f16.sqrt(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.sqrt: -; CHECK: fsqrt s{{.}}, s{{.}} -; CHECK: fsqrt s{{.}}, s{{.}} -; CHECK: fsqrt s{{.}}, s{{.}} -; CHECK: fsqrt s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.sqrt: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: fsqrt.4h -; CHECK-FP16-NEXT: ret + ; CHECK-LABEL: test_v4f16.sqrt: + ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: fsqrt.4h + ; CHECK-FP16-NEXT: ret %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a) ret %v4f16 %1 } -define %v8f16 @test_v8f16.sqrt(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 fsqrt tests, so skipped for -fullfp16. +define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.powi: + ; CHECK-COUNT-4: bl __powi + %1 = call %v4f16 @llvm.powi.v4f16(%v4f16 %a, i32 %b) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.sin(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.sin: + ; CHECK-COUNT-4: bl sinf + %1 = call %v4f16 @llvm.sin.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.cos(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.cos: + ; CHECK-COUNT-4: bl cosf + %1 = call %v4f16 @llvm.cos.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.pow(%v4f16 %a, %v4f16 %b) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.pow: + ; CHECK-COUNT-4: bl pow + %1 = call %v4f16 @llvm.pow.v4f16(%v4f16 %a, %v4f16 %b) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.exp(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.exp: + ; CHECK-COUNT-4: bl exp + %1 = call %v4f16 @llvm.exp.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.exp2(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.exp2: + ; CHECK-COUNT-4: bl exp2 + %1 = call %v4f16 @llvm.exp2.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.log(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.log: + ; CHECK-COUNT-4: bl log + %1 = call %v4f16 @llvm.log.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.log10(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.log10: + ; CHECK-COUNT-4: bl log10 + %1 = call %v4f16 @llvm.log10.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.log2(%v4f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v4f16.log2: + ; CHECK-COUNT-4: bl log2 + %1 = call %v4f16 @llvm.log2.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) { + ; CHECK-LABEL: test_v4f16.fma: + ; CHECK-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: fmla.4h + %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.fabs(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.fabs: + ; CHECK-NOFP16-COUNT-4: fabs s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: fabs.4h + ; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.floor(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.floor: + ; CHECK-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintm.4h + ; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.ceil(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.ceil: + ; CHECK-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintp.4h + ; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.trunc(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.trunc: + ; CHECK-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintz.4h + ; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.rint(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.rint: + ; CHECK-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintx.4h + ; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) { + ; CHECK-LABEL: test_v4f16.nearbyint: + ; CHECK-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frinti.4h + ; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a) + ret %v4f16 %1 +} -; CHECK-FP16-LABEL: test_v8f16.sqrt: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: fsqrt.8h -; CHECK-FP16-NEXT: ret +declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0 +declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0 +declare %v4f16 @llvm.sin.v4f16(%v4f16) #0 +declare %v4f16 @llvm.cos.v4f16(%v4f16) #0 +declare %v4f16 @llvm.pow.v4f16(%v4f16, %v4f16) #0 +declare %v4f16 @llvm.exp.v4f16(%v4f16) #0 +declare %v4f16 @llvm.exp2.v4f16(%v4f16) #0 +declare %v4f16 @llvm.log.v4f16(%v4f16) #0 +declare %v4f16 @llvm.log10.v4f16(%v4f16) #0 +declare %v4f16 @llvm.log2.v4f16(%v4f16) #0 +declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0 +declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0 +declare %v4f16 @llvm.floor.v4f16(%v4f16) #0 +declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0 +declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0 +declare %v4f16 @llvm.rint.v4f16(%v4f16) #0 +declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0 + +;;; + +%v8f16 = type <8 x half> + +define %v8f16 @test_v8f16.sqrt(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.sqrt: + ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: fsqrt.8h + ; CHECK-FP16-NEXT: ret %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a) ret %v8f16 %1 } +define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.powi: + ; CHECK-COUNT-8: bl __powi + %1 = call %v8f16 @llvm.powi.v8f16(%v8f16 %a, i32 %b) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.sin(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.sin: + ; CHECK-COUNT-8: bl sinf + %1 = call %v8f16 @llvm.sin.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.cos(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.cos: + ; CHECK-COUNT-8: bl cosf + %1 = call %v8f16 @llvm.cos.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.pow(%v8f16 %a, %v8f16 %b) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.pow: + ; CHECK-COUNT-8: bl pow + %1 = call %v8f16 @llvm.pow.v8f16(%v8f16 %a, %v8f16 %b) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.exp(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.exp: + ; CHECK-COUNT-8: bl exp + %1 = call %v8f16 @llvm.exp.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.exp2(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.exp2: + ; CHECK-COUNT-8: bl exp2 + %1 = call %v8f16 @llvm.exp2.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.log(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.log: + ; CHECK-COUNT-8: bl log + %1 = call %v8f16 @llvm.log.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.log10(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.log10: + ; CHECK-COUNT-8: bl log10 + %1 = call %v8f16 @llvm.log10.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.log2(%v8f16 %a) { + ; This operation is expanded, whether with or without +fullfp16. + ; CHECK-LABEL: test_v8f16.log2: + ; CHECK-COUNT-8: bl log2 + %1 = call %v8f16 @llvm.log2.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) { + ; CHECK-LABEL: test_v8f16.fma: + ; CHECK-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: fmla.8h + %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.fabs(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.fabs: + ; CHECK-NOFP16-COUNT-8: fabs s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: fabs.8h + ; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.floor(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.floor: + ; CHECK-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintm.8h + ; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.ceil(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.ceil: + ; CHECK-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintp.8h + ; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.trunc(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.trunc: + ; CHECK-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintz.8h + ; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.rint(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.rint: + ; CHECK-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frintx.8h + ; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a) + ret %v8f16 %1 +} +define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) { + ; CHECK-LABEL: test_v8f16.nearbyint: + ; CHECK-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}} + ; CHECK-FP16-NOT: fcvt + ; CHECK-FP16: frinti.8h + ; CHECK-FP16-NEXT: ret + %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a) + ret %v8f16 %1 +} + +declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0 +declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0 +declare %v8f16 @llvm.sin.v8f16(%v8f16) #0 +declare %v8f16 @llvm.cos.v8f16(%v8f16) #0 +declare %v8f16 @llvm.pow.v8f16(%v8f16, %v8f16) #0 +declare %v8f16 @llvm.exp.v8f16(%v8f16) #0 +declare %v8f16 @llvm.exp2.v8f16(%v8f16) #0 +declare %v8f16 @llvm.log.v8f16(%v8f16) #0 +declare %v8f16 @llvm.log10.v8f16(%v8f16) #0 +declare %v8f16 @llvm.log2.v8f16(%v8f16) #0 +declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0 +declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0 +declare %v8f16 @llvm.floor.v8f16(%v8f16) #0 +declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0 +declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0 +declare %v8f16 @llvm.rint.v8f16(%v8f16) #0 +declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0 + +;;; Float vectors + +%v2f32 = type <2 x float> + +; CHECK-LABEL: test_v2f32.sqrt: +define %v2f32 @test_v2f32.sqrt(%v2f32 %a) { + ; CHECK: fsqrt.2s + %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a) + ret %v2f32 %1 +} ; CHECK: test_v2f32.powi: define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) { ; CHECK: pow @@ -97,211 +387,44 @@ %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c) ret %v2f32 %1 } -define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) { -; CHECK-LABEL: test_v4f16.fma: -; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} -; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} -; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} -; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.fma: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: fmla.4h - %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) { -; Filechecks are unwieldy with 16 fcvt and 8 fma tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.fma: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: fmla.8h - %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c) - ret %v8f16 %1 -} ; CHECK-LABEL: test_v2f32.fabs: define %v2f32 @test_v2f32.fabs(%v2f32 %a) { ; CHECK: fabs.2s %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a) ret %v2f32 %1 } -define %v4f16 @test_v4f16.fabs(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.fabs: -; CHECK: fabs s{{.}}, s{{.}} -; CHECK: fabs s{{.}}, s{{.}} -; CHECK: fabs s{{.}}, s{{.}} -; CHECK: fabs s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.fabs: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: fabs.4h -; CHECK-FP16-NEXT: ret - %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.fabs(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 fabs tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.fabs: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: fabs.8h -; CHECK-FP16-NEXT: ret - %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a) - ret %v8f16 %1 -} ; CHECK-LABEL: test_v2f32.floor: define %v2f32 @test_v2f32.floor(%v2f32 %a) { ; CHECK: frintm.2s %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a) ret %v2f32 %1 } -define %v4f16 @test_v4f16.floor(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.floor: -; CHECK: frintm s{{.}}, s{{.}} -; CHECK: frintm s{{.}}, s{{.}} -; CHECK: frintm s{{.}}, s{{.}} -; CHECK: frintm s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.floor: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frintm.4h -; CHECK-FP16-NEXT: ret - %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.floor(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 frintm tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.floor: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frintm.8h -; CHECK-FP16-NEXT: ret - %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a) - ret %v8f16 %1 -} ; CHECK-LABEL: test_v2f32.ceil: define %v2f32 @test_v2f32.ceil(%v2f32 %a) { ; CHECK: frintp.2s %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a) ret %v2f32 %1 } -define %v4f16 @test_v4f16.ceil(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.ceil: -; CHECK: frintp s{{.}}, s{{.}} -; CHECK: frintp s{{.}}, s{{.}} -; CHECK: frintp s{{.}}, s{{.}} -; CHECK: frintp s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.ceil: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frintp.4h -; CHECK-FP16-NEXT: ret - %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.ceil(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.ceil: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frintp.8h -; CHECK-FP16-NEXT: ret - %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a) - ret %v8f16 %1 -} ; CHECK-LABEL: test_v2f32.trunc: define %v2f32 @test_v2f32.trunc(%v2f32 %a) { ; CHECK: frintz.2s %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a) ret %v2f32 %1 } -define %v4f16 @test_v4f16.trunc(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.trunc: -; CHECK: frintz s{{.}}, s{{.}} -; CHECK: frintz s{{.}}, s{{.}} -; CHECK: frintz s{{.}}, s{{.}} -; CHECK: frintz s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.trunc: -; CHECK-FP16: frintz.4h -; CHECK-FP16-NEXT: ret - %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.trunc(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.trunc: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frintz.8h -; CHECK-FP16-NEXT: ret - %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a) - ret %v8f16 %1 -} ; CHECK-LABEL: test_v2f32.rint: define %v2f32 @test_v2f32.rint(%v2f32 %a) { ; CHECK: frintx.2s %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a) ret %v2f32 %1 } -define %v4f16 @test_v4f16.rint(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.rint: -; CHECK: frintx s{{.}}, s{{.}} -; CHECK: frintx s{{.}}, s{{.}} -; CHECK: frintx s{{.}}, s{{.}} -; CHECK: frintx s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.rint: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frintx.4h -; CHECK-FP16-NEXT: ret - %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.rint(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.rint: -; CHECK-FP16: frintx.8h -; CHECK-FP16-NEXT: ret - %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a) - ret %v8f16 %1 -} ; CHECK-LABEL: test_v2f32.nearbyint: define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) { ; CHECK: frinti.2s %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a) ret %v2f32 %1 } -define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) { -; CHECK-LABEL: test_v4f16.nearbyint: -; CHECK: frinti s{{.}}, s{{.}} -; CHECK: frinti s{{.}}, s{{.}} -; CHECK: frinti s{{.}}, s{{.}} -; CHECK: frinti s{{.}}, s{{.}} - -; CHECK-FP16-LABEL: test_v4f16.nearbyint: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frinti.4h -; CHECK-FP16-NEXT: ret - %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a) - ret %v4f16 %1 -} -define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) { -; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16. - -; CHECK-FP16-LABEL: test_v8f16.nearbyint: -; CHECK-FP16-NOT: fcvt -; CHECK-FP16: frinti.8h -; CHECK-FP16-NEXT: ret - %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a) - ret %v8f16 %1 -} declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0 -declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0 -declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0 - declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0 @@ -311,38 +434,18 @@ declare %v2f32 @llvm.log.v2f32(%v2f32) #0 declare %v2f32 @llvm.log10.v2f32(%v2f32) #0 declare %v2f32 @llvm.log2.v2f32(%v2f32) #0 - declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0 -declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0 -declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0 - declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0 -declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0 -declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0 - declare %v2f32 @llvm.floor.v2f32(%v2f32) #0 -declare %v4f16 @llvm.floor.v4f16(%v4f16) #0 -declare %v8f16 @llvm.floor.v8f16(%v8f16) #0 - declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0 -declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0 -declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0 - declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0 -declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0 -declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0 - declare %v2f32 @llvm.rint.v2f32(%v2f32) #0 -declare %v4f16 @llvm.rint.v4f16(%v4f16) #0 -declare %v8f16 @llvm.rint.v8f16(%v8f16) #0 - declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0 -declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0 -declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0 ;;; %v4f32 = type <4 x float> + ; CHECK: test_v4f32.sqrt: define %v4f32 @test_v4f32.sqrt(%v4f32 %a) { ; CHECK: fsqrt.4s