Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4950,6 +4950,18 @@ Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FPTRUNC: { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + LLT SrcTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(1).getReg()).getElementType()); + moreElementsVectorSrc(MI, SrcTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -534,7 +534,10 @@ getActionDefinitionsBuilder(G_FPTRUNC) .legalFor( {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) - .clampMaxNumElements(0, s32, 2); + .clampNumElements(0, v4s16, v4s16) + .clampNumElements(0, v2s32, v2s32) + .scalarize(0); + getActionDefinitionsBuilder(G_FPEXT) .legalFor( {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir @@ -82,8 +82,12 @@ ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<2 x s16>) = G_FPTRUNC [[COPY]](<2 x s32>) - ; CHECK-NEXT: $s0 = COPY [[FPTRUNC]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FPTRUNC]](<4 x s16>) + ; CHECK-NEXT: $s0 = COPY [[UV2]](<2 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s16>) = G_FPTRUNC %0 Index: llvm/test/CodeGen/AArch64/fptrunc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/fptrunc.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define float @fptrunc_f64_f32(double %a) { +; CHECK-LABEL: fptrunc_f64_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: ret +entry: + %c = fptrunc double %a to float + ret float %c +} + +define half @fptrunc_f64_f16(double %a) { +; CHECK-LABEL: fptrunc_f64_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: ret +entry: + %c = fptrunc double %a to half + ret half %c +} + +define half @fptrunc_f32_f16(float %a) { +; CHECK-LABEL: fptrunc_f32_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +entry: + %c = fptrunc float %a to half + ret half %c +} + +define <2 x float> @fptrunc_v2f64_v2f32(<2 x double> %a) { +; CHECK-LABEL: fptrunc_v2f64_v2f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = fptrunc <2 x double> %a to <2 x float> + ret <2 x float> %c +} + +define <4 x float> @fptrunc_v4f64_v4f32(<4 x double> %a) { +; CHECK-LABEL: fptrunc_v4f64_v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: ret +entry: + %c = fptrunc <4 x double> %a to <4 x float> + ret <4 x float> %c +} + +define <2 x half> @fptrunc_v2f64_v2f16(<2 x double> %a) { +; CHECK-SD-LABEL: fptrunc_v2f64_v2f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov d1, v0.d[1] +; CHECK-SD-NEXT: fcvt h0, d0 +; CHECK-SD-NEXT: fcvt h1, d1 +; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptrunc_v2f64_v2f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fcvt h0, d0 +; CHECK-GI-NEXT: fcvt h1, d1 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v0.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = fptrunc <2 x double> %a to <2 x half> + ret <2 x half> %c +} + +define <4 x half> @fptrunc_v4f64_v4f16(<4 x double> %a) { +; CHECK-SD-LABEL: fptrunc_v4f64_v4f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov d2, v0.d[1] +; CHECK-SD-NEXT: fcvt h0, d0 +; CHECK-SD-NEXT: fcvt h2, d2 +; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] +; CHECK-SD-NEXT: fcvt h2, d1 +; CHECK-SD-NEXT: mov d1, v1.d[1] +; CHECK-SD-NEXT: mov v0.h[2], v2.h[0] +; CHECK-SD-NEXT: fcvt h1, d1 +; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptrunc_v4f64_v4f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: fcvt h0, d0 +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fcvt h1, d1 +; CHECK-GI-NEXT: fcvt h2, d2 +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: fcvt h2, d3 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v2.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = fptrunc <4 x double> %a to <4 x half> + ret <4 x half> %c +} + +define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { +; CHECK-SD-LABEL: fptrunc_v2f32_v2f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptrunc_v2f32_v2f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v0.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v0.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = fptrunc <2 x float> %a to <2 x half> + ret <2 x half> %c +} + +define <4 x half> @fptrunc_v4f32_v4f16(<4 x float> %a) { +; CHECK-LABEL: fptrunc_v4f32_v4f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c = fptrunc <4 x float> %a to <4 x half> + ret <4 x half> %c +} + +define <8 x half> @fptrunc_v8f32_v8f16(<8 x float> %a) { +; CHECK-LABEL: fptrunc_v8f32_v8f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn2 v0.8h, v1.4s +; CHECK-NEXT: ret +entry: + %c = fptrunc <8 x float> %a to <8 x half> + ret <8 x half> %c +}