Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -326,17 +326,27 @@ else setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FPOWI, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::v4f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::v4f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::v4f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::v4f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::v4f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::v4f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::v4f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::v4f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::v4f16, Promote); if (!Subtarget->hasFullFP16()) { setOperationAction(ISD::SELECT, MVT::f16, Promote); @@ -361,53 +371,39 @@ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); setOperationAction(ISD::FMINNAN, MVT::f16, Promote); setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); - } - - // v4f16 is also a storage-only type, so promote it to v4f32 when that is - // known to be safe. - setOperationAction(ISD::FADD, MVT::v4f16, Promote); - setOperationAction(ISD::FSUB, MVT::v4f16, Promote); - setOperationAction(ISD::FMUL, MVT::v4f16, Promote); - setOperationAction(ISD::FDIV, MVT::v4f16, Promote); - setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote); - setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote); - AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32); - AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32); - - // Expand all other v4f16 operations. - // FIXME: We could generate better code by promoting some operations to - // a pair of v4f32s - setOperationAction(ISD::FABS, MVT::v4f16, Expand); - setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); - setOperationAction(ISD::FCOS, MVT::v4f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); - setOperationAction(ISD::FMA, MVT::v4f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); - setOperationAction(ISD::FNEG, MVT::v4f16, Expand); - setOperationAction(ISD::FPOW, MVT::v4f16, Expand); - setOperationAction(ISD::FREM, MVT::v4f16, Expand); - setOperationAction(ISD::FROUND, MVT::v4f16, Expand); - setOperationAction(ISD::FRINT, MVT::v4f16, Expand); - setOperationAction(ISD::FSIN, MVT::v4f16, Expand); - setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); - setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); - setOperationAction(ISD::SETCC, MVT::v4f16, Expand); - setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); - setOperationAction(ISD::SELECT, MVT::v4f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); - setOperationAction(ISD::FEXP, MVT::v4f16, Expand); - setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); - setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); + // promote v4f16 to v4f32 when that is known to be safe. + setOperationAction(ISD::FADD, MVT::v4f16, Promote); + setOperationAction(ISD::FSUB, MVT::v4f16, Promote); + setOperationAction(ISD::FMUL, MVT::v4f16, Promote); + setOperationAction(ISD::FDIV, MVT::v4f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote); + setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote); + AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32); + AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32); + + setOperationAction(ISD::FABS, MVT::v4f16, Expand); + setOperationAction(ISD::FNEG, MVT::v4f16, Expand); + setOperationAction(ISD::FROUND, MVT::v4f16, Expand); + setOperationAction(ISD::FMA, MVT::v4f16, Expand); + setOperationAction(ISD::SETCC, MVT::v4f16, Expand); + setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); + setOperationAction(ISD::SELECT, MVT::v4f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); + setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); + setOperationAction(ISD::FRINT, MVT::v4f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); + } + // v8f16 is also a storage-only type, so expand it. setOperationAction(ISD::FABS, MVT::v8f16, Expand); setOperationAction(ISD::FADD, MVT::v8f16, Expand); Index: llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -1,14 +1,30 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16 ;;; Float vectors %v2f32 = type <2 x float> -; CHECK: test_v2f32.sqrt: +%v4f16 = type <4 x half> + +; CHECK-LABEL: test_v2f32.sqrt: define %v2f32 @test_v2f32.sqrt(%v2f32 %a) { ; CHECK: fsqrt.2s %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a) ret %v2f32 %1 } +define %v4f16 @test_v4f16.sqrt(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.sqrt: +; CHECK: fsqrt s{{.}}, s{{.}} +; CHECK: fsqrt s{{.}}, s{{.}} +; CHECK: fsqrt s{{.}}, s{{.}} +; CHECK: fsqrt s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.sqrt: +; CHECK-FP16: fsqrt.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a) + ret %v4f16 %1 +} ; CHECK: test_v2f32.powi: define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) { ; CHECK: pow @@ -63,50 +79,142 @@ %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a) ret %v2f32 %1 } -; CHECK: test_v2f32.fma: +; CHECK-LABEL: test_v2f32.fma: define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) { - ; CHECK: fma + ; CHECK: fmla.2s %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c) ret %v2f32 %1 } -; CHECK: test_v2f32.fabs: +define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) { +; CHECK-LABEL: test_v4f16.fma: +; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} +; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} +; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} +; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.fma: +; CHECK-FP16: fmla.4h + %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c) + ret %v4f16 %1 +} +; CHECK-LABEL: test_v2f32.fabs: define %v2f32 @test_v2f32.fabs(%v2f32 %a) { - ; CHECK: fabs + ; CHECK: fabs.2s %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a) ret %v2f32 %1 } -; CHECK: test_v2f32.floor: +define %v4f16 @test_v4f16.fabs(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.fabs: +; CHECK: fabs s{{.}}, s{{.}} +; CHECK: fabs s{{.}}, s{{.}} +; CHECK: fabs s{{.}}, s{{.}} +; CHECK: fabs s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.fabs: +; CHECK-FP16: fabs.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +; CHECK-LABEL: test_v2f32.floor: define %v2f32 @test_v2f32.floor(%v2f32 %a) { ; CHECK: frintm.2s %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a) ret %v2f32 %1 } -; CHECK: test_v2f32.ceil: +define %v4f16 @test_v4f16.floor(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.floor: +; CHECK: frintm s{{.}}, s{{.}} +; CHECK: frintm s{{.}}, s{{.}} +; CHECK: frintm s{{.}}, s{{.}} +; CHECK: frintm s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.floor: +; CHECK-FP16: frintm.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +; CHECK-LABEL: test_v2f32.ceil: define %v2f32 @test_v2f32.ceil(%v2f32 %a) { ; CHECK: frintp.2s %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a) ret %v2f32 %1 } -; CHECK: test_v2f32.trunc: +define %v4f16 @test_v4f16.ceil(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.ceil: +; CHECK: frintp s{{.}}, s{{.}} +; CHECK: frintp s{{.}}, s{{.}} +; CHECK: frintp s{{.}}, s{{.}} +; CHECK: frintp s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.ceil: +; CHECK-FP16: frintp.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +; CHECK-LABEL: test_v2f32.trunc: define %v2f32 @test_v2f32.trunc(%v2f32 %a) { ; CHECK: frintz.2s %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a) ret %v2f32 %1 } -; CHECK: test_v2f32.rint: +define %v4f16 @test_v4f16.trunc(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.trunc: +; CHECK: frintz s{{.}}, s{{.}} +; CHECK: frintz s{{.}}, s{{.}} +; CHECK: frintz s{{.}}, s{{.}} +; CHECK: frintz s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.trunc: +; CHECK-FP16: frintz.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +; CHECK-LABEL: test_v2f32.rint: define %v2f32 @test_v2f32.rint(%v2f32 %a) { ; CHECK: frintx.2s %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a) ret %v2f32 %1 } -; CHECK: test_v2f32.nearbyint: +define %v4f16 @test_v4f16.rint(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.rint: +; CHECK: frintx s{{.}}, s{{.}} +; CHECK: frintx s{{.}}, s{{.}} +; CHECK: frintx s{{.}}, s{{.}} +; CHECK: frintx s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.rint: +; CHECK-FP16: frintx.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a) + ret %v4f16 %1 +} +; CHECK-LABEL: test_v2f32.nearbyint: define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) { ; CHECK: frinti.2s %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a) ret %v2f32 %1 } +define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) { +; CHECK-LABEL: test_v4f16.nearbyint: +; CHECK: frinti s{{.}}, s{{.}} +; CHECK: frinti s{{.}}, s{{.}} +; CHECK: frinti s{{.}}, s{{.}} +; CHECK: frinti s{{.}}, s{{.}} + +; CHECK-FP16-LABEL: test_v4f16.nearbyint: +; CHECK-FP16: frinti.4h +; CHECK-FP16-NEXT: ret + %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a) + ret %v4f16 %1 +} declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0 +declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0 + declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0 @@ -116,13 +224,27 @@ declare %v2f32 @llvm.log.v2f32(%v2f32) #0 declare %v2f32 @llvm.log10.v2f32(%v2f32) #0 declare %v2f32 @llvm.log2.v2f32(%v2f32) #0 + declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0 +declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0 + declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0 +declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0 + declare %v2f32 @llvm.floor.v2f32(%v2f32) #0 +declare %v4f16 @llvm.floor.v4f16(%v4f16) #0 + declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0 +declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0 + declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0 +declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0 + declare %v2f32 @llvm.rint.v2f32(%v2f32) #0 +declare %v4f16 @llvm.rint.v4f16(%v4f16) #0 + declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0 +declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0 ;;; Index: llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -1,12 +1,17 @@ -; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK-COMMON +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16 --check-prefix=CHECK-COMMON define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) { entry: -; CHECK-LABEL: add_h: -; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h -; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h -; CHECK: fadd [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] -; CHECK: fcvtn v0.4h, [[RES]] +; CHECK-CVT-LABEL: add_h: +; CHECK-CVT-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h +; CHECK-CVT-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h +; CHECK-CVT-NEXT: fadd [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[RES]] + +; CHECK-FP16-LABEL: add_h: +; CHECK-FP16: fadd v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %0 = fadd <4 x half> %a, %b ret <4 x half> %0 } @@ -14,20 +19,24 @@ define <4 x half> @build_h4(<4 x half> %a) { entry: -; CHECK-LABEL: build_h4: -; CHECK: mov [[GPR:w[0-9]+]], #15565 -; CHECK: dup v0.4h, [[GPR]] +; CHECK-COMMON-LABEL: build_h4: +; CHECK-COMMON: mov [[GPR:w[0-9]+]], #15565 +; CHECK-COMMON-NEXT: dup v0.4h, [[GPR]] ret <4 x half> } define <4 x half> @sub_h(<4 x half> %a, <4 x half> %b) { entry: -; CHECK-LABEL: sub_h: -; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h -; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h -; CHECK: fsub [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] -; CHECK: fcvtn v0.4h, [[RES]] +; CHECK-CVT-LABEL: sub_h: +; CHECK-CVT-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h +; CHECK-CVT-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h +; CHECK-CVT-NEXT: fsub [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[RES]] + +; CHECK-FP16-LABEL: sub_h: +; CHECK-FP16: fsub v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %0 = fsub <4 x half> %a, %b ret <4 x half> %0 } @@ -35,11 +44,15 @@ define <4 x half> @mul_h(<4 x half> %a, <4 x half> %b) { entry: -; CHECK-LABEL: mul_h: -; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h -; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h -; CHECK: fmul [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] -; CHECK: fcvtn v0.4h, [[RES]] +; CHECK-CVT-LABEL: mul_h: +; CHECK-CVT-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h +; CHECK-CVT-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h +; CHECK-CVT-NEXT: fmul [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[RES]] + +; CHECK-FP16-LABEL: mul_h: +; CHECK-FP16: fmul v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %0 = fmul <4 x half> %a, %b ret <4 x half> %0 } @@ -47,11 +60,15 @@ define <4 x half> @div_h(<4 x half> %a, <4 x half> %b) { entry: -; CHECK-LABEL: div_h: -; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h -; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h -; CHECK: fdiv [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] -; CHECK: fcvtn v0.4h, [[RES]] +; CHECK-CVT-LABEL: div_h: +; CHECK-CVT-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h +; CHECK-CVT-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h +; CHECK-CVT-NEXT: fdiv [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]] +; CHECK-CVT-NEXT: fcvtn v0.4h, [[RES]] + +; CHECK-FP16-LABEL: div_h: +; CHECK-FP16: fdiv v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: ret %0 = fdiv <4 x half> %a, %b ret <4 x half> %0 } @@ -59,8 +76,9 @@ define <4 x half> @load_h(<4 x half>* %a) { entry: -; CHECK-LABEL: load_h: -; CHECK: ldr d0, [x0] +; CHECK-COMMON-LABEL: load_h: +; CHECK-COMMON: ldr d0, [x0] +; CHECK-COMMON-NEXT: ret %0 = load <4 x half>, <4 x half>* %a, align 4 ret <4 x half> %0 } @@ -68,476 +86,643 @@ define void @store_h(<4 x half>* %a, <4 x half> %b) { entry: -; CHECK-LABEL: store_h: -; CHECK: str d0, [x0] +; CHECK-COMMON-LABEL: store_h: +; CHECK-COMMON: str d0, [x0] +; CHECK-COMMON-NEXT: ret store <4 x half> %b, <4 x half>* %a, align 4 ret void } define <4 x half> @s_to_h(<4 x float> %a) { -; CHECK-LABEL: s_to_h: -; CHECK: fcvtn v0.4h, v0.4s +; CHECK-COMMON-LABEL: s_to_h: +; CHECK-COMMON: fcvtn v0.4h, v0.4s +; CHECK-COMMON-NEXT: ret %1 = fptrunc <4 x float> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @d_to_h(<4 x double> %a) { -; CHECK-LABEL: d_to_h: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: ins -; CHECK-DAG: ins -; CHECK-DAG: ins -; CHECK-DAG: ins +; CHECK-COMMON-LABEL: d_to_h: +; CHECK-COMMON: mov +; CHECK-COMMON: fcvt +; CHECK-COMMON: fcvt +; CHECK-COMMON: fcvt +; CHECK-COMMON: mov +; CHECK-COMMON: ins +; CHECK-COMMON: ins +; CHECK-COMMON: fcvt +; CHECK-COMMON: ins +; CHECK-COMMON: ret %1 = fptrunc <4 x double> %a to <4 x half> ret <4 x half> %1 } define <4 x float> @h_to_s(<4 x half> %a) { -; CHECK-LABEL: h_to_s: -; CHECK: fcvtl v0.4s, v0.4h +; CHECK-COMMON-LABEL: h_to_s: +; CHECK-COMMON: fcvtl v0.4s, v0.4h +; CHECK-COMMON-NEXT: ret %1 = fpext <4 x half> %a to <4 x float> ret <4 x float> %1 } define <4 x double> @h_to_d(<4 x half> %a) { -; CHECK-LABEL: h_to_d: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: ins -; CHECK-DAG: ins -; CHECK-DAG: ins -; CHECK-DAG: ins +; CHECK-COMMON-LABEL: h_to_d: +; CHECK-COMMON: mov +; CHECK-COMMON: fcvt +; CHECK-COMMON: mov +; CHECK-COMMON: mov +; CHECK-COMMON: fcvt +; CHECK-COMMON: fcvt +; CHECK-COMMON: fcvt +; CHECK-COMMON: ins +; CHECK-COMMON: ins +; CHECK-COMMON: mov +; CHECK-COMMON: ret %1 = fpext <4 x half> %a to <4 x double> ret <4 x double> %1 } define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) { -; CHECK-LABEL: bitcast_i_to_h: -; CHECK: mov v0.16b, v1.16b +; CHECK-COMMON-LABEL: bitcast_i_to_h: +; CHECK-COMMON: mov v0.16b, v1.16b +; CHECK-COMMON-NEXT: ret %2 = bitcast <4 x i16> %a to <4 x half> ret <4 x half> %2 } define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) { -; CHECK-LABEL: bitcast_h_to_i: -; CHECK: mov v0.16b, v1.16b +; CHECK-COMMON-LABEL: bitcast_h_to_i: +; CHECK-COMMON: mov v0.16b, v1.16b +; CHECK-COMMON-NEXT: ret %2 = bitcast <4 x half> %a to <4 x i16> ret <4 x i16> %2 } define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { -; CHECK-LABEL: sitofp_i8: -; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 -; CHECK-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8 -; CHECK-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0 -; CHECK-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]] -; CHECK-NEXT: fcvtn v0.4h, [[OP4]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: sitofp_i8: +; CHECK-COMMON-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 +; CHECK-COMMON-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8 +; CHECK-COMMON-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0 +; CHECK-COMMON-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]] +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP4]] +; CHECK-COMMON-NEXT: ret %1 = sitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @sitofp_i16(<4 x i16> %a) #0 { -; CHECK-LABEL: sitofp_i16: -; CHECK-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]] -; CHECK-NEXT: fcvtn v0.4h, [[OP2]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: sitofp_i16: +; CHECK-COMMON-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-COMMON-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-COMMON-NEXT: ret %1 = sitofp <4 x i16> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @sitofp_i32(<4 x i32> %a) #0 { -; CHECK-LABEL: sitofp_i32: -; CHECK-NEXT: scvtf [[OP1:v[0-9]+\.4s]], v0.4s -; CHECK-NEXT: fcvtn v0.4h, [[OP1]] +; CHECK-COMMON-LABEL: sitofp_i32: +; CHECK-COMMON-NEXT: scvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP1]] +; CHECK-COMMON-NEXT: ret %1 = sitofp <4 x i32> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @sitofp_i64(<4 x i64> %a) #0 { -; CHECK-LABEL: sitofp_i64: -; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d -; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d -; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] -; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]] -; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s +; CHECK-COMMON-LABEL: sitofp_i64: +; CHECK-COMMON-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-COMMON-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-COMMON-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK-COMMON-NEXT: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP3]].4s +; CHECK-COMMON-NEXT: ret %1 = sitofp <4 x i64> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @uitofp_i8(<4 x i8> %a) #0 { -; CHECK-LABEL: uitofp_i8: -; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] -; CHECK-NEXT: fcvtn v0.4h, [[OP2]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: uitofp_i8: +; CHECK-COMMON-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-COMMON-NEXT: ret %1 = uitofp <4 x i8> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @uitofp_i16(<4 x i16> %a) #0 { -; CHECK-LABEL: uitofp_i16: -; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 -; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] -; CHECK-NEXT: fcvtn v0.4h, [[OP2]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: uitofp_i16: +; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] +; CHECK-COMMON-NEXT: ret %1 = uitofp <4 x i16> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @uitofp_i32(<4 x i32> %a) #0 { -; CHECK-LABEL: uitofp_i32: -; CHECK-NEXT: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s -; CHECK-NEXT: fcvtn v0.4h, [[OP1]] +; CHECK-COMMON-LABEL: uitofp_i32: +; CHECK-COMMON-NEXT: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP1]] +; CHECK-COMMON-NEXT: ret %1 = uitofp <4 x i32> %a to <4 x half> ret <4 x half> %1 } define <4 x half> @uitofp_i64(<4 x i64> %a) #0 { -; CHECK-LABEL: uitofp_i64: -; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d -; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d -; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] -; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]] -; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s +; CHECK-COMMON-LABEL: uitofp_i64: +; CHECK-COMMON-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-COMMON-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-COMMON-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK-COMMON-NEXT: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP3]].4s +; CHECK-COMMON-NEXT: ret %1 = uitofp <4 x i64> %a to <4 x half> ret <4 x half> %1 } define void @test_insert_at_zero(half %a, <4 x half>* %b) #0 { -; CHECK-LABEL: test_insert_at_zero: -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: test_insert_at_zero: +; CHECK-COMMON-NEXT: str d0, [x0] +; CHECK-COMMON-NEXT: ret %1 = insertelement <4 x half> undef, half %a, i64 0 store <4 x half> %1, <4 x half>* %b, align 4 ret void } define <4 x i8> @fptosi_i8(<4 x half> %a) #0 { -; CHECK-LABEL: fptosi_i8: -; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h -; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] -; CHECK-NEXT: xtn v0.4h, [[REG2]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: fptosi_i8: +; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]] +; CHECK-COMMON-NEXT: ret %1 = fptosi<4 x half> %a to <4 x i8> ret <4 x i8> %1 } define <4 x i16> @fptosi_i16(<4 x half> %a) #0 { -; CHECK-LABEL: fptosi_i16: -; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h -; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] -; CHECK-NEXT: xtn v0.4h, [[REG2]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: fptosi_i16: +; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]] +; CHECK-COMMON-NEXT: ret %1 = fptosi<4 x half> %a to <4 x i16> ret <4 x i16> %1 } define <4 x i8> @fptoui_i8(<4 x half> %a) #0 { -; CHECK-LABEL: fptoui_i8: -; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-COMMON-LABEL: fptoui_i8: +; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h ; NOTE: fcvtzs selected here because the xtn shaves the sign bit -; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] -; CHECK-NEXT: xtn v0.4h, [[REG2]] -; CHECK-NEXT: ret +; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]] +; CHECK-COMMON-NEXT: ret %1 = fptoui<4 x half> %a to <4 x i8> ret <4 x i8> %1 } define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { -; CHECK-LABEL: fptoui_i16: -; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h -; CHECK-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]] -; CHECK-NEXT: xtn v0.4h, [[REG2]] -; CHECK-NEXT: ret +; CHECK-COMMON-LABEL: fptoui_i16: +; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-COMMON-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]] +; CHECK-COMMON-NEXT: ret %1 = fptoui<4 x half> %a to <4 x i16> ret <4 x i16> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_une: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, ne -; CHECK-DAG: csetm {{.*}}, ne -; CHECK-DAG: csetm {{.*}}, ne -; CHECK-DAG: csetm {{.*}}, ne define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_une: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, ne +; CHECK-CVT-DAG: csetm {{.*}}, ne +; CHECK-CVT-DAG: csetm {{.*}}, ne +; CHECK-CVT-DAG: csetm {{.*}}, ne + +; CHECK-FP16-LABEL: test_fcmp_une: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ne +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ne +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ne +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ne +; CHECK-FP16: ret %1 = fcmp une <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ueq: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm [[REG1:w[0-9]+]], eq -; CHECK-DAG: csetm [[REG2:w[0-9]+]], eq -; CHECK-DAG: csetm [[REG3:w[0-9]+]], eq -; CHECK-DAG: csetm [[REG4:w[0-9]+]], eq -; CHECK-DAG: csinv {{.*}}, [[REG1]], wzr, vc -; CHECK-DAG: csinv {{.*}}, [[REG2]], wzr, vc -; CHECK-DAG: csinv {{.*}}, [[REG3]], wzr, vc -; CHECK-DAG: csinv {{.*}}, [[REG4]], wzr, vc define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ueq: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], eq +; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], eq +; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], eq +; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], eq +; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, vc +; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, vc +; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, vc +; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, vc + +; CHECK-FP16-LABEL: test_fcmp_ueq: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: ret %1 = fcmp ueq <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ugt: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, hi -; CHECK-DAG: csetm {{.*}}, hi -; CHECK-DAG: csetm {{.*}}, hi -; CHECK-DAG: csetm {{.*}}, hi define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ugt: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, hi +; CHECK-CVT-DAG: csetm {{.*}}, hi +; CHECK-CVT-DAG: csetm {{.*}}, hi +; CHECK-CVT-DAG: csetm {{.*}}, hi + +; CHECK-FP16-LABEL: test_fcmp_ugt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, hi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, hi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, hi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, hi +; CHECK-FP16: ret %1 = fcmp ugt <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_uge: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, pl -; CHECK-DAG: csetm {{.*}}, pl -; CHECK-DAG: csetm {{.*}}, pl -; CHECK-DAG: csetm {{.*}}, pl define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_uge: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, pl +; CHECK-CVT-DAG: csetm {{.*}}, pl +; CHECK-CVT-DAG: csetm {{.*}}, pl +; CHECK-CVT-DAG: csetm {{.*}}, pl + +; CHECK-FP16-LABEL: test_fcmp_uge: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, pl +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, pl +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, pl +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, pl +; CHECK-FP16: ret %1 = fcmp uge <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ult: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, lt -; CHECK-DAG: csetm {{.*}}, lt -; CHECK-DAG: csetm {{.*}}, lt -; CHECK-DAG: csetm {{.*}}, lt define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ult: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, lt +; CHECK-CVT-DAG: csetm {{.*}}, lt +; CHECK-CVT-DAG: csetm {{.*}}, lt +; CHECK-CVT-DAG: csetm {{.*}}, lt + +; CHECK-FP16-LABEL: test_fcmp_ult: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, lt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, lt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, lt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, lt +; CHECK-FP16: ret %1 = fcmp ult <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ule: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, le -; CHECK-DAG: csetm {{.*}}, le -; CHECK-DAG: csetm {{.*}}, le -; CHECK-DAG: csetm {{.*}}, le define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ule: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, le +; CHECK-CVT-DAG: csetm {{.*}}, le +; CHECK-CVT-DAG: csetm {{.*}}, le +; CHECK-CVT-DAG: csetm {{.*}}, le + +; CHECK-FP16-LABEL: test_fcmp_ule: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, le +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, le +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, le +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, le +; CHECK-FP16: ret %1 = fcmp ule <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_uno: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, vs -; CHECK-DAG: csetm {{.*}}, vs -; CHECK-DAG: csetm {{.*}}, vs -; CHECK-DAG: csetm {{.*}}, vs define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_uno: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, vs +; CHECK-CVT-DAG: csetm {{.*}}, vs +; CHECK-CVT-DAG: csetm {{.*}}, vs +; CHECK-CVT-DAG: csetm {{.*}}, vs + +; CHECK-FP16-LABEL: test_fcmp_uno: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vs +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vs +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vs +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vs +; CHECK-FP16: ret %1 = fcmp uno <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_one: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm [[REG1:w[0-9]+]], mi -; CHECK-DAG: csetm [[REG2:w[0-9]+]], mi -; CHECK-DAG: csetm [[REG3:w[0-9]+]], mi -; CHECK-DAG: csetm [[REG4:w[0-9]+]], mi -; CHECK-DAG: csinv {{.*}}, [[REG1]], wzr, le -; CHECK-DAG: csinv {{.*}}, [[REG2]], wzr, le -; CHECK-DAG: csinv {{.*}}, [[REG3]], wzr, le -; CHECK-DAG: csinv {{.*}}, [[REG4]], wzr, le - define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_one: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], mi +; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], mi +; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], mi +; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], mi +; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, le +; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, le +; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, le +; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, le + +; CHECK-FP16-LABEL: test_fcmp_one: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: ret %1 = fcmp one <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_oeq: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, eq -; CHECK-DAG: csetm {{.*}}, eq -; CHECK-DAG: csetm {{.*}}, eq -; CHECK-DAG: csetm {{.*}}, eq define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_oeq: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, eq +; CHECK-CVT-DAG: csetm {{.*}}, eq +; CHECK-CVT-DAG: csetm {{.*}}, eq +; CHECK-CVT-DAG: csetm {{.*}}, eq + +; CHECK-FP16-LABEL: test_fcmp_oeq: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, eq +; CHECK-FP16: ret %1 = fcmp oeq <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ogt: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, gt -; CHECK-DAG: csetm {{.*}}, gt -; CHECK-DAG: csetm {{.*}}, gt -; CHECK-DAG: csetm {{.*}}, gt define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ogt: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, gt +; CHECK-CVT-DAG: csetm {{.*}}, gt +; CHECK-CVT-DAG: csetm {{.*}}, gt +; CHECK-CVT-DAG: csetm {{.*}}, gt + +; CHECK-FP16-LABEL: test_fcmp_ogt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, gt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, gt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, gt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, gt +; CHECK-FP16: ret %1 = fcmp ogt <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_oge: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, ge -; CHECK-DAG: csetm {{.*}}, ge -; CHECK-DAG: csetm {{.*}}, ge -; CHECK-DAG: csetm {{.*}}, ge define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_oge: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, ge +; CHECK-CVT-DAG: csetm {{.*}}, ge +; CHECK-CVT-DAG: csetm {{.*}}, ge +; CHECK-CVT-DAG: csetm {{.*}}, ge + +; CHECK-FP16-LABEL: test_fcmp_oge: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ge +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ge +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ge +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ge +; CHECK-FP16: ret %1 = fcmp oge <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_olt: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, mi -; CHECK-DAG: csetm {{.*}}, mi -; CHECK-DAG: csetm {{.*}}, mi -; CHECK-DAG: csetm {{.*}}, mi define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_olt: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, mi +; CHECK-CVT-DAG: csetm {{.*}}, mi +; CHECK-CVT-DAG: csetm {{.*}}, mi +; CHECK-CVT-DAG: csetm {{.*}}, mi + +; CHECK-FP16-LABEL: test_fcmp_olt: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, mi +; CHECK-FP16: ret %1 = fcmp olt <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ole: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, ls -; CHECK-DAG: csetm {{.*}}, ls -; CHECK-DAG: csetm {{.*}}, ls -; CHECK-DAG: csetm {{.*}}, ls define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ole: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, ls +; CHECK-CVT-DAG: csetm {{.*}}, ls +; CHECK-CVT-DAG: csetm {{.*}}, ls +; CHECK-CVT-DAG: csetm {{.*}}, ls + +; CHECK-FP16-LABEL: test_fcmp_ole: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ls +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ls +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ls +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, ls +; CHECK-FP16: ret %1 = fcmp ole <4 x half> %a, %b ret <4 x i1> %1 } -; Function Attrs: nounwind readnone -; CHECK-LABEL: test_fcmp_ord: -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: fcvt -; CHECK-DAG: csetm {{.*}}, vc -; CHECK-DAG: csetm {{.*}}, vc -; CHECK-DAG: csetm {{.*}}, vc -; CHECK-DAG: csetm {{.*}}, vc define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-CVT-LABEL: test_fcmp_ord: +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: fcvt +; CHECK-CVT-DAG: csetm {{.*}}, vc +; CHECK-CVT-DAG: csetm {{.*}}, vc +; CHECK-CVT-DAG: csetm {{.*}}, vc +; CHECK-CVT-DAG: csetm {{.*}}, vc + +; CHECK-FP16-LABEL: test_fcmp_ord: +; CHECK-FP16-NOT: fcvt +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vc +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vc +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vc +; CHECK-FP16: fcmp h{{.}}, h{{.}} +; CHECK-FP16: csetm {{.*}}, vc +; CHECK-FP16: ret %1 = fcmp ord <4 x half> %a, %b ret <4 x i1> %1 }