Index: llvm/docs/GlobalISel/GenericOpcode.rst =================================================================== --- llvm/docs/GlobalISel/GenericOpcode.rst +++ llvm/docs/GlobalISel/GenericOpcode.rst @@ -655,10 +655,10 @@ These reductions are relaxed variants which may reduce the elements in any order. -G_VECREDUCE_FMAX, G_VECREDUCE_FMIN +G_VECREDUCE_FMAX, G_VECREDUCE_FMIN, G_VECREDUCE_FMAXIMUM, G_VECREDUCE_FMINIMUM ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. +FMIN/FMAX/FMINIMUM/FMAXIMUM nodes can have flags, for NaN/NoNaN variants. Integer/bitwise reductions Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1973,6 +1973,19 @@ MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src}); } + + /// Build and insert \p Res = G_VECREDUCE_FMAXIMUM \p Src + MachineInstrBuilder buildVecReduceFMaximum(const DstOp &Dst, + const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMAXIMUM, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMINIMUM \p Src + MachineInstrBuilder buildVecReduceFMinimum(const DstOp &Dst, + const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMINIMUM, {Dst}, {Src}); + } + /// Build and insert \p Res = G_VECREDUCE_ADD \p Src MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src}); Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -57,6 +57,8 @@ case TargetOpcode::G_VECREDUCE_FMUL: \ case TargetOpcode::G_VECREDUCE_FMAX: \ case TargetOpcode::G_VECREDUCE_FMIN: \ + case TargetOpcode::G_VECREDUCE_FMAXIMUM: \ + case TargetOpcode::G_VECREDUCE_FMINIMUM: \ case TargetOpcode::G_VECREDUCE_ADD: \ case TargetOpcode::G_VECREDUCE_MUL: \ case TargetOpcode::G_VECREDUCE_AND: \ @@ -72,6 +74,8 @@ case TargetOpcode::G_VECREDUCE_FMUL: \ case TargetOpcode::G_VECREDUCE_FMAX: \ case TargetOpcode::G_VECREDUCE_FMIN: \ + case TargetOpcode::G_VECREDUCE_FMAXIMUM: \ + case TargetOpcode::G_VECREDUCE_FMINIMUM: \ case TargetOpcode::G_VECREDUCE_ADD: \ case TargetOpcode::G_VECREDUCE_MUL: \ case TargetOpcode::G_VECREDUCE_AND: \ Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -805,6 +805,8 @@ HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL) HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX) HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAXIMUM) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMINIMUM) HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD) HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL) HANDLE_TARGET_OPCODE(G_VECREDUCE_AND) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -1386,6 +1386,8 @@ def G_VECREDUCE_FMAX : VectorReduction; def G_VECREDUCE_FMIN : VectorReduction; +def G_VECREDUCE_FMAXIMUM : VectorReduction; +def G_VECREDUCE_FMINIMUM : VectorReduction; def G_VECREDUCE_ADD : VectorReduction; def G_VECREDUCE_MUL : VectorReduction; Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -165,6 +165,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1797,6 +1797,10 @@ return TargetOpcode::G_VECREDUCE_FMIN; case Intrinsic::vector_reduce_fmax: return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_fminimum: + return TargetOpcode::G_VECREDUCE_FMINIMUM; + case Intrinsic::vector_reduce_fmaximum: + return TargetOpcode::G_VECREDUCE_FMAXIMUM; case Intrinsic::vector_reduce_add: return TargetOpcode::G_VECREDUCE_ADD; case Intrinsic::vector_reduce_mul: Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2649,6 +2649,8 @@ } case TargetOpcode::G_VECREDUCE_FMIN: case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -4453,6 +4455,12 @@ case TargetOpcode::G_VECREDUCE_FMIN: ScalarOpc = TargetOpcode::G_FMINNUM; break; + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + ScalarOpc = TargetOpcode::G_FMAXIMUM; + break; + case TargetOpcode::G_VECREDUCE_FMINIMUM: + ScalarOpc = TargetOpcode::G_FMINIMUM; + break; case TargetOpcode::G_VECREDUCE_ADD: ScalarOpc = TargetOpcode::G_ADD; break; Index: llvm/lib/CodeGen/MachineVerifier.cpp =================================================================== --- llvm/lib/CodeGen/MachineVerifier.cpp +++ llvm/lib/CodeGen/MachineVerifier.cpp @@ -1720,6 +1720,8 @@ case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMINIMUM: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL: case TargetOpcode::G_VECREDUCE_AND: Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -847,7 +847,8 @@ .clampMaxNumElements(1, s32, 4) .lower(); - getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX}) + getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX, + G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM}) .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}}) .legalIf([=](const LegalityQuery &Query) { const auto &Ty = Query.Types[1]; Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -996,6 +996,8 @@ case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMINIMUM: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL: case TargetOpcode::G_VECREDUCE_AND: Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll @@ -66,6 +66,8 @@ declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>) define float @fmax(<4 x float> %vec) { ; CHECK-LABEL: name: fmax @@ -106,6 +108,45 @@ ret float %res } +define float @fmaximum(<4 x float> %vec) { + ; CHECK-LABEL: name: fmaximum + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>) + ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec) + ret float %res +} + +define float @fminimum(<4 x float> %vec) { + ; CHECK-LABEL: name: fminimum + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>) + ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec) + ret float %res +} + +define float @fminimum_nnan(<4 x float> %vec) { + ; CHECK-LABEL: name: fminimum_nnan + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>) + ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %res = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec) + ret float %res +} + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) define i32 @add(<4 x i32> %vec) { Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir @@ -46,3 +46,48 @@ RET_ReallyLR implicit $h0 ... +--- +name: fminimum_v2s32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: fminimum_v2s32 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[COPY]](<2 x s32>) + ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMINIMUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(s32) = G_VECREDUCE_FMINIMUM %0(<2 x s32>) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fmaximum_v8s16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: fmaximum_v8s16 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[FPEXT]], [[FPEXT1]] + ; CHECK-NEXT: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[FMAXIMUM]](<4 x s32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[VECREDUCE_FMAXIMUM]](s32) + ; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(s16) = G_VECREDUCE_FMAXIMUM %0(<8 x s16>) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -718,6 +718,14 @@ # DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_VECREDUCE_FMAXIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_VECREDUCE_FMINIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected Index: llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir @@ -41,3 +41,24 @@ RET_ReallyLR implicit $w0 ... +--- +name: fmaximum_v4s32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: fmaximum_v4s32 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 + ; CHECK: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:fpr(s32) = G_VECREDUCE_FMAXIMUM [[COPY]](<4 x s32>) + ; CHECK: $w0 = COPY [[VECREDUCE_FMAXIMUM]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_VECREDUCE_FMAXIMUM %0(<4 x s32>) + $w0 = COPY %1(s32) + RET_ReallyLR implicit $w0 + +... + Index: llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf declare half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a) @@ -30,11 +40,29 @@ } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a) ret float %b } @@ -56,166 +84,195 @@ } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fmax s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fmax s2, s3, s2 -; CHECK-NOFP-NEXT: mov h3, v1.h[2] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fmax s2, s4, s2 -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fmax s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[4] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fmax s2, s2, s4 -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fmax s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[6] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s3, s5, s3 -; CHECK-NOFP-NEXT: fmax s0, s0, s1 -; CHECK-NOFP-NEXT: fmax s2, s2, s4 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fmax s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fmax s2, s3, s2 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fmax s2, s4, s2 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s4 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[6] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s3, s5, s3 +; CHECK-NOFP-SD-NEXT: fmax s0, s0, s1 +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s4 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmax v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmax v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %a) ret half %b } @@ -249,13 +306,37 @@ } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fmax v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fmax v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fmax v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fmax v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fmax v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fmaxv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmax v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmax v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a) ret float %b } Index: llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf declare half @llvm.vector.reduce.fminimum.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a) @@ -30,11 +40,29 @@ } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a) ret float %b } @@ -56,166 +84,195 @@ } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fmin s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fmin s2, s3, s2 -; CHECK-NOFP-NEXT: mov h3, v1.h[2] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fmin s2, s4, s2 -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fmin s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[4] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fmin s2, s2, s4 -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fmin s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[6] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s3, s5, s3 -; CHECK-NOFP-NEXT: fmin s0, s0, s1 -; CHECK-NOFP-NEXT: fmin s2, s2, s4 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fmin s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fmin s2, s3, s2 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fmin s2, s4, s2 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s4 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[6] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s3, s5, s3 +; CHECK-NOFP-SD-NEXT: fmin s0, s0, s1 +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s4 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmin v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmin v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %a) ret half %b } @@ -249,13 +306,37 @@ } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fmin v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fminv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fmin v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fmin v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fminv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fmin v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fmin v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fminv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmin v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmin v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fminv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %a) ret float %b }