Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -163,6 +163,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2647,6 +2647,20 @@ Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAX: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + LLT WideVecTy = VecTy.isVector() + ? LLT::vector(VecTy.getElementCount(), WideTy) + : WideTy; + widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; } } Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -847,6 +847,18 @@ .clampMaxNumElements(1, s32, 4) .lower(); + getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX}) + .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}}) + .legalIf([=](const LegalityQuery &Query) { + const auto &Ty = Query.Types[1]; + return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16; + }) + .minScalarOrElt(0, MinFPScalar) + .clampMaxNumElements(1, s64, 2) + .clampMaxNumElements(1, s32, 4) + .clampMaxNumElements(1, s16, 8) + .lower(); + getActionDefinitionsBuilder( {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR}) // Try to break down into smaller vectors as long as they're at least 64 Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s + +--- +name: fmin_v2s32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: fmin_v2s32 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[COPY]](<2 x s32>) + ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMIN]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(s32) = G_VECREDUCE_FMIN %0(<2 x s32>) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fmax_v8s16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: fmax_v8s16 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXNUM [[FPEXT]], [[FPEXT1]] + ; CHECK-NEXT: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[FMAXNUM]](<4 x s32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[VECREDUCE_FMAX]](s32) + ; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(s16) = G_VECREDUCE_FMAX %0(<8 x s16>) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -712,11 +712,12 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected Index: llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll @@ -1,6 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128 +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128 declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) @@ -30,11 +44,29 @@ } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } @@ -56,195 +88,231 @@ } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnmv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b } define half @test_v4f16_ninf(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16_ninf: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16_ninf: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnmv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16_ninf: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fmaxnm s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fmaxnm s2, s3, s2 -; CHECK-NOFP-NEXT: mov h3, v1.h[2] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmaxnm s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fmaxnm s2, s4, s2 -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmaxnm s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[4] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmaxnm s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s4 -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmaxnm s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[6] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmaxnm s3, s5, s3 -; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s4 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s4, s2 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s4 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[6] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s4 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %a) ret half %b } @@ -278,13 +346,37 @@ } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmaxnm v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxnmv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fmaxnm v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fmaxnm v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fmaxnm v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fmaxnm v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmaxnm v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) ret float %b } Index: llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -1,6 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128 +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128 declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) @@ -30,11 +44,29 @@ } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b } @@ -56,203 +88,239 @@ } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnmv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b } define half @test_v4f16_ninf(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16_ninf: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16_ninf: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnmv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16_ninf: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s3, s2 -; CHECK-NOFP-NEXT: fcsel s2, s3, s2, gt -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, gt -; CHECK-NOFP-NEXT: mov h4, v1.h[2] -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fmaxnm s2, s3, s2 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, gt -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, gt -; CHECK-NOFP-NEXT: mov h4, v1.h[4] -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, gt -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, gt -; CHECK-NOFP-NEXT: mov h4, v1.h[6] -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, gt -; CHECK-NOFP-NEXT: fcmp s0, s1 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcsel s0, s0, s1, gt -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fmaxnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmaxnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s3, s2 +; CHECK-NOFP-SD-NEXT: fcsel s2, s3, s2, gt +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[2] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt +; CHECK-NOFP-SD-NEXT: fcmp s0, s1 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmax.v16f16(<16 x half> %a) ret half %b } @@ -286,13 +354,37 @@ } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmaxnm v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxnmv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fmaxnm v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fmaxnm v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fmaxnm v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fmaxnm v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmaxnm v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmaxnmv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) ret float %b } Index: llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -1,6 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128 +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128 declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) @@ -30,11 +44,29 @@ } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) ret float %b } @@ -56,203 +88,239 @@ } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminnmv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fminnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %b } define half @test_v4f16_ninf(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16_ninf: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16_ninf: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminnmv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16_ninf: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fminnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminnmv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fminnm v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fminnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s3, s2 -; CHECK-NOFP-NEXT: fcsel s2, s3, s2, lt -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, lt -; CHECK-NOFP-NEXT: mov h4, v1.h[2] -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fminnm s2, s3, s2 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, lt -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fminnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, lt -; CHECK-NOFP-NEXT: mov h4, v1.h[4] -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fminnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, lt -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fminnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, lt -; CHECK-NOFP-NEXT: mov h4, v1.h[6] -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcmp s5, s4 -; CHECK-NOFP-NEXT: fminnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcsel s3, s5, s4, lt -; CHECK-NOFP-NEXT: fcmp s0, s1 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcsel s0, s0, s1, lt -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fminnm s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fminnm s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s3, s2 +; CHECK-NOFP-SD-NEXT: fcsel s2, s3, s2, lt +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[2] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fminnm s2, s3, s2 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6] +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcmp s5, s4 +; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt +; CHECK-NOFP-SD-NEXT: fcmp s0, s1 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, lt +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fminnm s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fminnm v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fminnm v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmin.v16f16(<16 x half> %a) ret half %b } @@ -286,13 +354,37 @@ } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fminnm v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fminnm v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fminnmv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fminnm v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fminnm v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fminnmv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fminnm v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fminnm v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fminnmv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminnm v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminnmv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fminnm v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fminnm v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fminnmv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) ret float %b }