Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_lane.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_lane.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_lane.ll @@ -0,0 +1,301 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) +declare half @llvm.fma.f16(half, half, half) #1 + +define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfma_lane_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer + %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a) + ret <4 x half> %fmla3 +} + +define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmaq_lane_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer + %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a) + ret <8 x half> %fmla3 +} + +define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfma_laneq_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a) + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmaq_laneq_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret +entry: + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a) + ret <8 x half> %0 +} + +define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) { +; CHECK-LABEL: t_vfma_n_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <4 x half> undef, half %c, i32 0 + %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4 + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { +; CHECK-LABEL: t_vfmaq_n_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <8 x half> undef, half %c, i32 0 + %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4 + ret <8 x half> %0 +} + +define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmah_lane_f16: +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %extract = extractelement <4 x half> %c, i32 0 + %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) + ret half %0 +} + +define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmah_laneq_f16: +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %extract = extractelement <8 x half> %c, i32 0 + %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a) + ret half %0 +} + +define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfms_lane_f16: +; CHECK: fneg v1.4h, v1.4h +; CHECK-NEXT: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> , %b + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer + %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a) + ret <4 x half> %fmla3 +} + +define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsq_lane_f16: +; CHECK: fneg v1.8h, v1.8h +; CHECK-NEXT: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> , %b + %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer + %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a) + ret <8 x half> %fmla3 +} + +define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfms_laneq_f16: +; CHECK: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmls v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> , %b + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a) + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsq_laneq_f16: +; CHECK: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmls v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> , %b + %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a) + ret <8 x half> %0 +} + +define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) { +; CHECK-LABEL: t_vfms_n_f16: +; CHECK: fneg v1.4h, v1.4h +; CHECK-NEXT: dup v2.4h, v2.h[0] +; CHECK-NEXT: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %sub = fsub <4 x half> , %b + %vecinit = insertelement <4 x half> undef, half %c, i32 0 + %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4 + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) { +; CHECK-LABEL: t_vfmsq_n_f16: +; CHECK: fneg v1.8h, v1.8h +; CHECK-NEXT: dup v2.8h, v2.h[0] +; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %sub = fsub <8 x half> , %b + %vecinit = insertelement <8 x half> undef, half %c, i32 0 + %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4 + ret <8 x half> %0 +} + +define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsh_lane_f16: +; CHECK: fneg h1, h1 +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %0 = fsub half 0xH8000, %b + %extract = extractelement <4 x half> %c, i32 0 + %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) + ret half %1 +} + +define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vfmsh_laneq_f16: +; CHECK: fneg h1, h1 +; CHECK-NEXT: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %0 = fsub half 0xH8000, %b + %extract = extractelement <8 x half> %c, i32 0 + %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a) + ret half %1 +} + +define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmul_laneq_f16: +; CHECK: fmul v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer + %mul = fmul <4 x half> %shuffle, %a + ret <4 x half> %mul +} + +define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulq_laneq_f16: +; CHECK: fmul v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %mul = fmul <8 x half> %shuffle, %a + ret <8 x half> %mul +} + +define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vmulh_lane_f16: +; CHECK: fmul h0, h0, v1.h[0] +; CHECK-NEXT: ret +entry: + %0 = extractelement <4 x half> %c, i32 0 + %1 = fmul half %0, %a + ret half %1 +} + +define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) { +; CHECK-LABEL: t_vmulh_laneq_f16: +; CHECK: fmul h0, h0, v1.h[0] +; CHECK-NEXT: ret +entry: + %0 = extractelement <8 x half> %c, i32 0 + %1 = fmul half %0, %a + ret half %1 +} + +define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulx_lane_f16: +; CHECK: fmulx v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulxq_lane_f16: +; CHECK: fmulx v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 + ret <8 x half> %vmulx2.i +} + +define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulx_laneq_f16: +; CHECK: fmulx v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4 + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) { +; CHECK-LABEL: t_vmulxq_laneq_f16: +; CHECK: fmulx v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4 + ret <8 x half> %vmulx2.i +} + +define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) { +; CHECK-LABEL: t_vmulx_n_f16: +; CHECK: dup v1.4h, v1.h[0] +; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <4 x half> undef, half %c, i32 0 + %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4 + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) { +; CHECK-LABEL: t_vmulxq_n_f16: +; CHECK: dup v1.8h, v1.h[0] +; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vecinit = insertelement <8 x half> undef, half %c, i32 0 + %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4 + ret <8 x half> %vmulx2.i +} Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll @@ -0,0 +1,318 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtps.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtps.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtns.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtns.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtms.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtms.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtau.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtau.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtas.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtas.i32.f16(half) +declare half @llvm.aarch64.neon.frsqrte.f16(half) +declare half @llvm.aarch64.neon.frecpx.f16(half) +declare half @llvm.aarch64.neon.frecpe.f16(half) + +define dso_local i16 @t2(half %a) { +; CHECK-LABEL: t2: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, eq +; CHECK-NEXT: ret +entry: + %0 = fcmp oeq half %a, 0xH0000 + %vceqz = sext i1 %0 to i16 + ret i16 %vceqz +} + +define dso_local i16 @t3(half %a) { +; CHECK-LABEL: t3: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, ge +; CHECK-NEXT: ret +entry: + %0 = fcmp oge half %a, 0xH0000 + %vcgez = sext i1 %0 to i16 + ret i16 %vcgez +} + +define dso_local i16 @t4(half %a) { +; CHECK-LABEL: t4: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, gt +; CHECK-NEXT: ret +entry: + %0 = fcmp ogt half %a, 0xH0000 + %vcgtz = sext i1 %0 to i16 + ret i16 %vcgtz +} + +define dso_local i16 @t5(half %a) { +; CHECK-LABEL: t5: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, ls +; CHECK-NEXT: ret +entry: + %0 = fcmp ole half %a, 0xH0000 + %vclez = sext i1 %0 to i16 + ret i16 %vclez +} + +define dso_local i16 @t6(half %a) { +; CHECK-LABEL: t6: +; CHECK: fcmp h0, #0.0 +; CHECK-NEXT: csetm w0, mi +; CHECK-NEXT: ret +entry: + %0 = fcmp olt half %a, 0xH0000 + %vcltz = sext i1 %0 to i16 + ret i16 %vcltz +} + +define dso_local half @t8(i32 %a) { +; CHECK-LABEL: t8: +; CHECK: scvtf h0, w0 +; CHECK-NEXT: ret +entry: + %0 = sitofp i32 %a to half + ret half %0 +} + +define dso_local half @t9(i64 %a) { +; CHECK-LABEL: t9: +; CHECK: scvtf h0, x0 +; CHECK-NEXT: ret +entry: + %0 = sitofp i64 %a to half + ret half %0 +} + +define dso_local half @t12(i64 %a) { +; CHECK-LABEL: t12: +; CHECK: ucvtf h0, x0 +; CHECK-NEXT: ret +entry: + %0 = uitofp i64 %a to half + ret half %0 +} + +define dso_local i16 @t13(half %a) { +; CHECK-LABEL: t13: +; CHECK: fcvtzs w0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptosi half %a to i16 + ret i16 %0 +} + +define dso_local i64 @t15(half %a) { +; CHECK-LABEL: t15: +; CHECK: fcvtzs x0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptosi half %a to i64 + ret i64 %0 +} + +define dso_local i16 @t16(half %a) { +; CHECK-LABEL: t16: +; CHECK: fcvtzs w0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptoui half %a to i16 + ret i16 %0 +} + +define dso_local i64 @t18(half %a) { +; CHECK-LABEL: t18: +; CHECK: fcvtzu x0, h0 +; CHECK-NEXT: ret +entry: + %0 = fptoui half %a to i64 + ret i64 %0 +} + +define dso_local i16 @t19(half %a) { +; CHECK-LABEL: t19: +; CHECK: fcvtas w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t21(half %a) { +; CHECK-LABEL: t21: +; CHECK: fcvtas x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a) + ret i64 %vcvtah_s64_f16 +} + +define dso_local i16 @t22(half %a) { +; CHECK-LABEL: t22: +; CHECK: fcvtau w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t24(half %a) { +; CHECK-LABEL: t24: +; CHECK: fcvtau x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtah_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a) + ret i64 %vcvtah_u64_f16 +} + +define dso_local i16 @t25(half %a) { +; CHECK-LABEL: t25: +; CHECK: fcvtms w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t27(half %a) { +; CHECK-LABEL: t27: +; CHECK: fcvtms x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtmh_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a) + ret i64 %vcvtmh_s64_f16 +} + +define dso_local i16 @t28(half %a) { +; CHECK-LABEL: t28: +; CHECK: fcvtmu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t30(half %a) { +; CHECK-LABEL: t30: +; CHECK: fcvtmu x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtmh_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a) + ret i64 %vcvtmh_u64_f16 +} + +define dso_local i16 @t31(half %a) { +; CHECK-LABEL: t31: +; CHECK: fcvtns w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t33(half %a) { +; CHECK-LABEL: t33: +; CHECK: fcvtns x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtnh_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a) + ret i64 %vcvtnh_s64_f16 +} + +define dso_local i16 @t34(half %a) { +; CHECK-LABEL: t34: +; CHECK: fcvtnu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t36(half %a) { +; CHECK-LABEL: t36: +; CHECK: fcvtnu x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtnh_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a) + ret i64 %vcvtnh_u64_f16 +} + +define dso_local i16 @t37(half %a) { +; CHECK-LABEL: t37: +; CHECK: fcvtps w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t39(half %a) { +; CHECK-LABEL: t39: +; CHECK: fcvtps x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtph_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a) + ret i64 %vcvtph_s64_f16 +} + +define dso_local i16 @t40(half %a) { +; CHECK-LABEL: t40: +; CHECK: fcvtpu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) + %0 = trunc i32 %fcvt to i16 + ret i16 %0 +} + +define dso_local i64 @t42(half %a) { +; CHECK-LABEL: t42: +; CHECK: fcvtpu x0, h0 +; CHECK-NEXT: ret +entry: + %vcvtph_u64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a) + ret i64 %vcvtph_u64_f16 +} + +define dso_local half @t44(half %a) { +; CHECK-LABEL: t44: +; CHECK: frecpe h0, h0 +; CHECK-NEXT: ret +entry: + %vrecpeh_f16 = tail call half @llvm.aarch64.neon.frecpe.f16(half %a) + ret half %vrecpeh_f16 +} + +define dso_local half @t45(half %a) { +; CHECK-LABEL: t45: +; CHECK: frecpx h0, h0 +; CHECK-NEXT: ret +entry: + %vrecpxh_f16 = tail call half @llvm.aarch64.neon.frecpx.f16(half %a) + ret half %vrecpxh_f16 +} + +define dso_local half @t53(half %a) { +; CHECK-LABEL: t53: +; CHECK: frsqrte h0, h0 +; CHECK-NEXT: ret +entry: + %vrsqrteh_f16 = tail call half @llvm.aarch64.neon.frsqrte.f16(half %a) + ret half %vrsqrteh_f16 +} Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -0,0 +1,117 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare half @llvm.aarch64.sisd.fabd.f16(half, half) +declare half @llvm.aarch64.neon.fmax.f16(half, half) +declare half @llvm.aarch64.neon.fmin.f16(half, half) +declare half @llvm.aarch64.neon.frsqrts.f16(half, half) +declare half @llvm.aarch64.neon.frecps.f16(half, half) +declare half @llvm.aarch64.neon.fmulx.f16(half, half) + +define dso_local half @t_vabdh_f16(half %a, half %b) { +; CHECK-LABEL: t_vabdh_f16: +; CHECK: fabd h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vabdh_f16 = tail call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b) + ret half %vabdh_f16 +} + +define dso_local i16 @t_vceqh_f16(half %a, half %b) { +; CHECK-LABEL: t_vceqh_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, eq +; CHECK-NEXT: ret +entry: + %0 = fcmp oeq half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vcgeh_f16(half %a, half %b) { +; CHECK-LABEL: t_vcgeh_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, ge +; CHECK-NEXT: ret +entry: + %0 = fcmp oge half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vcgth_f16(half %a, half %b) { +; CHECK-LABEL: t_vcgth_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, gt +; CHECK-NEXT: ret +entry: + %0 = fcmp ogt half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vcleh_f16(half %a, half %b) { +; CHECK-LABEL: t_vcleh_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, ls +; CHECK-NEXT: ret +entry: + %0 = fcmp ole half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local i16 @t_vclth_f16(half %a, half %b) { +; CHECK-LABEL: t_vclth_f16: +; CHECK: fcmp h0, h1 +; CHECK-NEXT: csetm w0, mi +; CHECK-NEXT: ret +entry: + %0 = fcmp olt half %a, %b + %vcmpd = sext i1 %0 to i16 + ret i16 %vcmpd +} + +define dso_local half @t_vmaxh_f16(half %a, half %b) { +; CHECK-LABEL: t_vmaxh_f16: +; CHECK: fmax h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vmax = tail call half @llvm.aarch64.neon.fmax.f16(half %a, half %b) + ret half %vmax +} + +define dso_local half @t_vminh_f16(half %a, half %b) { +; CHECK-LABEL: t_vminh_f16: +; CHECK: fmin h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vmin = tail call half @llvm.aarch64.neon.fmin.f16(half %a, half %b) + ret half %vmin +} + +define dso_local half @t_vmulxh_f16(half %a, half %b) { +; CHECK-LABEL: t_vmulxh_f16: +; CHECK: fmulx h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vmulxh_f16 = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) + ret half %vmulxh_f16 +} + +define dso_local half @t_vrecpsh_f16(half %a, half %b) { +; CHECK-LABEL: t_vrecpsh_f16: +; CHECK: frecps h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vrecps = tail call half @llvm.aarch64.neon.frecps.f16(half %a, half %b) + ret half %vrecps +} + +define dso_local half @t_vrsqrtsh_f16(half %a, half %b) { +; CHECK-LABEL: t_vrsqrtsh_f16: +; CHECK: frsqrts h0, h0, h1 +; CHECK-NEXT: ret +entry: + %vrsqrtsh_f16 = tail call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b) + ret half %vrsqrtsh_f16 +} Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare half @llvm.fma.f16(half, half, half) + +define dso_local half @t_vfmah_f16(half %a, half %b, half %c) { +; CHECK-LABEL: t_vfmah_f16: +; CHECK: fmadd h0, h1, h2, h0 +; CHECK-NEXT: ret +entry: + %0 = tail call half @llvm.fma.f16(half %b, half %c, half %a) + ret half %0 +} + Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_1op.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_1op.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_1op.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>) +declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) +declare <4 x half> @llvm.sqrt.v4f16(<4 x half>) +declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) + +define dso_local <4 x half> @t_vrndi_f16(<4 x half> %a) { +; CHECK-LABEL: t_vrndi_f16: +; CHECK: frinti v0.4h, v0.4h +; CHECK-NEXT: ret +entry: + %vrndi1.i = tail call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %a) + ret <4 x half> %vrndi1.i +} + +define dso_local <8 x half> @t_vrndiq_f16(<8 x half> %a) { +; CHECK-LABEL: t_vrndiq_f16: +; CHECK: frinti v0.8h, v0.8h +; CHECK-NEXT: ret +entry: + %vrndi1.i = tail call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) + ret <8 x half> %vrndi1.i +} + +define dso_local <4 x half> @t_vsqrt_f16(<4 x half> %a) { +; CHECK-LABEL: t_vsqrt_f16: +; CHECK: fsqrt v0.4h, v0.4h +; CHECK-NEXT: ret +entry: + %vsqrt.i = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a) + ret <4 x half> %vsqrt.i +} + +define dso_local <8 x half> @t_vsqrtq_f16(<8 x half> %a) { +; CHECK-LABEL: t_vsqrtq_f16: +; CHECK: fsqrt v0.8h, v0.8h +; CHECK-NEXT: ret +entry: + %vsqrt.i = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) + ret <8 x half> %vsqrt.i +} Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_2op.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_2op.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_2op.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half>, <8 x half>) + +define dso_local <4 x half> @t_vdiv_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vdiv_f16: +; CHECK: fdiv v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %div.i = fdiv <4 x half> %a, %b + ret <4 x half> %div.i +} + +define dso_local <8 x half> @t_vdivq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vdivq_f16: +; CHECK: fdiv v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %div.i = fdiv <8 x half> %a, %b + ret <8 x half> %div.i +} + +define dso_local <4 x half> @t_vmulx_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vmulx_f16: +; CHECK: fmulx v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vmulx2.i +} + +define dso_local <8 x half> @t_vmulxq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vmulxq_f16: +; CHECK: fmulx v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vmulx2.i +} + +define dso_local <4 x half> @t_vpminnm_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vpminnm_f16: +; CHECK: fminnmp v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vpminnm2.i = tail call <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpminnm2.i +} + +define dso_local <8 x half> @t_vpminnmq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vpminnmq_f16: +; CHECK: fminnmp v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vpminnm2.i = tail call <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vpminnm2.i +} + +define dso_local <4 x half> @t_vpmaxnm_f16(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: t_vpmaxnm_f16: +; CHECK: fmaxnmp v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +entry: + %vpmaxnm2.i = tail call <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpmaxnm2.i +} + +define dso_local <8 x half> @t_vpmaxnmq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: t_vpmaxnmq_f16: +; CHECK: fmaxnmp v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +entry: + %vpmaxnm2.i = tail call <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vpmaxnm2.i +} Index: llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_3op.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_3op.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16_intrinsic_vector_3op.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s + +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) + +define dso_local <4 x half> @t_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: t_vfma_f16: +; CHECK: fmla v0.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) + ret <4 x half> %0 +} + +define dso_local <8 x half> @t_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: t_vfmaq_f16: +; CHECK: fmla v0.8h, v2.8h, v1.8h +; CHECK-NEXT: ret +entry: + %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) + ret <8 x half> %0 +}