diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -999,7 +999,8 @@ truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask, fma_combines, fold_binop_into_select]>; + and_or_disjoint_mask, fma_combines, fold_binop_into_select, + select_to_minmax]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5885,7 +5885,7 @@ LLT DstTy = MRI.getType(Dst); // Bail out early on pointers, since we'll never want to fold to a min/max. // TODO: Handle vectors. - if (DstTy.isPointer() || DstTy.isVector()) + if (DstTy.isPointer()) return false; // Match a floating point compare with a less-than/greater-than predicate. // TODO: Allow multiple users of the compare if they are all selects. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -803,10 +803,16 @@ .libcallFor({s128}) .minScalar(0, MinFPScalar); - // TODO: Vector types. getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM}) - .legalFor({MinFPScalar, s32, s64}) - .minScalar(0, MinFPScalar); + .legalFor({MinFPScalar, s32, s64, v4s32, v2s64}) + .legalIf([=](const LegalityQuery &Query) { + const auto &Ty = Query.Types[0]; + return Ty == v8s16 && HasFP16; + }) + .minScalar(0, MinFPScalar) + .clampNumElements(0, v8s16, v8s16) + .clampNumElements(0, v4s32, v4s32) + .clampNumElements(0, v2s64, v2s64); // TODO: Libcall support for s128. // TODO: s16 should be legal with full FP16 support. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fmaximum.mir @@ -9,13 +9,17 @@ bb.0: liveins: $h0, $h1 ; FP16-LABEL: name: s16_legal_with_full_fp16 - ; FP16: %a:_(s16) = COPY $h0 + ; FP16: liveins: $h0, $h1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(s16) = COPY $h0 ; FP16-NEXT: %b:_(s16) = COPY $h1 ; FP16-NEXT: %legalize_me:_(s16) = G_FMAXIMUM %a, %b ; FP16-NEXT: $h0 = COPY %legalize_me(s16) ; FP16-NEXT: RET_ReallyLR implicit $h0 ; NO-FP16-LABEL: name: s16_legal_with_full_fp16 - ; NO-FP16: %a:_(s16) = COPY $h0 + ; NO-FP16: liveins: $h0, $h1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(s16) = COPY $h0 ; NO-FP16-NEXT: %b:_(s16) = COPY $h1 ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16) ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16) @@ -37,13 +41,17 @@ bb.0: liveins: $s0, $s1 ; FP16-LABEL: name: s32_legal - ; FP16: %a:_(s32) = COPY $s0 + ; FP16: liveins: $s0, $s1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(s32) = COPY $s0 ; FP16-NEXT: %b:_(s32) = COPY $s1 ; FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b ; FP16-NEXT: $s0 = COPY %legalize_me(s32) ; FP16-NEXT: RET_ReallyLR implicit $s0 ; NO-FP16-LABEL: name: s32_legal - ; NO-FP16: %a:_(s32) = COPY $s0 + ; NO-FP16: liveins: $s0, $s1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(s32) = COPY $s0 ; NO-FP16-NEXT: %b:_(s32) = COPY $s1 ; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMAXIMUM %a, %b ; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32) @@ -62,13 +70,17 @@ bb.0: liveins: $d0, $d1 ; FP16-LABEL: name: s64_legal - ; FP16: %a:_(s64) = COPY $d0 + ; FP16: liveins: $d0, $d1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(s64) = COPY $d0 ; FP16-NEXT: %b:_(s64) = COPY $d1 ; FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b ; FP16-NEXT: $d0 = COPY %legalize_me(s64) ; FP16-NEXT: RET_ReallyLR implicit $d0 ; NO-FP16-LABEL: name: s64_legal - ; NO-FP16: %a:_(s64) = COPY $d0 + ; NO-FP16: liveins: $d0, $d1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(s64) = COPY $d0 ; NO-FP16-NEXT: %b:_(s64) = COPY $d1 ; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMAXIMUM %a, %b ; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64) @@ -78,3 +90,33 @@ %legalize_me:_(s64) = G_FMAXIMUM %a, %b $d0 = COPY %legalize_me(s64) RET_ReallyLR implicit $d0 +... +--- +name: v4s32 +alignment: 4 +body: | + bb.0: + liveins: $q0, $q1 + ; FP16-LABEL: name: v4s32 + ; FP16: liveins: $q0, $q1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0 + ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1 + ; FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b + ; FP16-NEXT: $q0 = COPY %maximum(<4 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + ; NO-FP16-LABEL: name: v4s32 + ; NO-FP16: liveins: $q0, $q1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0 + ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1 + ; NO-FP16-NEXT: %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b + ; NO-FP16-NEXT: $q0 = COPY %maximum(<4 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + %a:_(<4 x s32>) = COPY $q0 + %b:_(<4 x s32>) = COPY $q1 + %maximum:_(<4 x s32>) = G_FMAXIMUM %a, %b + $q0 = COPY %maximum(<4 x s32>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminimum.mir @@ -9,13 +9,17 @@ bb.0: liveins: $h0, $h1 ; FP16-LABEL: name: s16_legal_with_full_fp16 - ; FP16: %a:_(s16) = COPY $h0 + ; FP16: liveins: $h0, $h1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(s16) = COPY $h0 ; FP16-NEXT: %b:_(s16) = COPY $h1 ; FP16-NEXT: %legalize_me:_(s16) = G_FMINIMUM %a, %b ; FP16-NEXT: $h0 = COPY %legalize_me(s16) ; FP16-NEXT: RET_ReallyLR implicit $h0 ; NO-FP16-LABEL: name: s16_legal_with_full_fp16 - ; NO-FP16: %a:_(s16) = COPY $h0 + ; NO-FP16: liveins: $h0, $h1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(s16) = COPY $h0 ; NO-FP16-NEXT: %b:_(s16) = COPY $h1 ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16) ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16) @@ -37,13 +41,17 @@ bb.0: liveins: $s0, $s1 ; FP16-LABEL: name: s32_legal - ; FP16: %a:_(s32) = COPY $s0 + ; FP16: liveins: $s0, $s1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(s32) = COPY $s0 ; FP16-NEXT: %b:_(s32) = COPY $s1 ; FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b ; FP16-NEXT: $s0 = COPY %legalize_me(s32) ; FP16-NEXT: RET_ReallyLR implicit $s0 ; NO-FP16-LABEL: name: s32_legal - ; NO-FP16: %a:_(s32) = COPY $s0 + ; NO-FP16: liveins: $s0, $s1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(s32) = COPY $s0 ; NO-FP16-NEXT: %b:_(s32) = COPY $s1 ; NO-FP16-NEXT: %legalize_me:_(s32) = G_FMINIMUM %a, %b ; NO-FP16-NEXT: $s0 = COPY %legalize_me(s32) @@ -62,13 +70,17 @@ bb.0: liveins: $d0, $d1 ; FP16-LABEL: name: s64_legal - ; FP16: %a:_(s64) = COPY $d0 + ; FP16: liveins: $d0, $d1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(s64) = COPY $d0 ; FP16-NEXT: %b:_(s64) = COPY $d1 ; FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b ; FP16-NEXT: $d0 = COPY %legalize_me(s64) ; FP16-NEXT: RET_ReallyLR implicit $d0 ; NO-FP16-LABEL: name: s64_legal - ; NO-FP16: %a:_(s64) = COPY $d0 + ; NO-FP16: liveins: $d0, $d1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(s64) = COPY $d0 ; NO-FP16-NEXT: %b:_(s64) = COPY $d1 ; NO-FP16-NEXT: %legalize_me:_(s64) = G_FMINIMUM %a, %b ; NO-FP16-NEXT: $d0 = COPY %legalize_me(s64) @@ -78,3 +90,77 @@ %legalize_me:_(s64) = G_FMINIMUM %a, %b $d0 = COPY %legalize_me(s64) RET_ReallyLR implicit $d0 +... +--- +name: v4s32 +alignment: 4 +body: | + bb.0: + liveins: $q0, $q1 + ; FP16-LABEL: name: v4s32 + ; FP16: liveins: $q0, $q1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0 + ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1 + ; FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b + ; FP16-NEXT: $q0 = COPY %minimum(<4 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + ; NO-FP16-LABEL: name: v4s32 + ; NO-FP16: liveins: $q0, $q1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0 + ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1 + ; NO-FP16-NEXT: %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b + ; NO-FP16-NEXT: $q0 = COPY %minimum(<4 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + %a:_(<4 x s32>) = COPY $q0 + %b:_(<4 x s32>) = COPY $q1 + %minimum:_(<4 x s32>) = G_FMINIMUM %a, %b + $q0 = COPY %minimum(<4 x s32>) + RET_ReallyLR implicit $q0 + +... + +--- +name: v8s32 +alignment: 4 +body: | + bb.0: + liveins: $q0, $q1, $q2, $q3 + ; FP16-LABEL: name: v8s32 + ; FP16: liveins: $q0, $q1, $q2, $q3 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %a:_(<4 x s32>) = COPY $q0 + ; FP16-NEXT: %b:_(<4 x s32>) = COPY $q1 + ; FP16-NEXT: %c:_(<4 x s32>) = COPY $q2 + ; FP16-NEXT: %d:_(<4 x s32>) = COPY $q3 + ; FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c + ; FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d + ; FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>) + ; FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + ; NO-FP16-LABEL: name: v8s32 + ; NO-FP16: liveins: $q0, $q1, $q2, $q3 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %a:_(<4 x s32>) = COPY $q0 + ; NO-FP16-NEXT: %b:_(<4 x s32>) = COPY $q1 + ; NO-FP16-NEXT: %c:_(<4 x s32>) = COPY $q2 + ; NO-FP16-NEXT: %d:_(<4 x s32>) = COPY $q3 + ; NO-FP16-NEXT: [[FMINIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %a, %c + ; NO-FP16-NEXT: [[FMINIMUM1:%[0-9]+]]:_(<4 x s32>) = G_FMINIMUM %b, %d + ; NO-FP16-NEXT: $q0 = COPY [[FMINIMUM]](<4 x s32>) + ; NO-FP16-NEXT: $q1 = COPY [[FMINIMUM1]](<4 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + %a:_(<4 x s32>) = COPY $q0 + %b:_(<4 x s32>) = COPY $q1 + %c:_(<4 x s32>) = COPY $q2 + %d:_(<4 x s32>) = COPY $q3 + %v1:_(<8 x s32>) = G_CONCAT_VECTORS %a, %b + %v2:_(<8 x s32>) = G_CONCAT_VECTORS %c, %d + %minimum:_(<8 x s32>) = G_FMINIMUM %v1, %v2 + %uv1:_(<4 x s32>), %uv2:_(<4 x s32>) = G_UNMERGE_VALUES %minimum + $q0 = COPY %uv1(<4 x s32>) + $q1 = COPY %uv2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -510,11 +510,11 @@ # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. the first uncovered type index: 1, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index -# DEBUG-NEXT: .. the first uncovered type index: 1, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir @@ -0,0 +1,188 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -mattr=+fullfp16 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s +--- +name: test_s16 +body: | + bb.0: + liveins: $h0 + + ; CHECK-LABEL: name: test_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: $h0 = COPY [[FMAXIMUM]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 + %0:_(s16) = COPY $h0 + %1:_(s16) = G_FCONSTANT half 0xH0000 + %2:_(s1) = G_FCMP floatpred(olt), %0(s16), %1 + %3:_(s16) = G_SELECT %2(s1), %1, %0 + $h0 = COPY %3(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: test_s32 +body: | + bb.0: + liveins: $s0 + + ; CHECK-LABEL: name: test_s32 + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: $s0 = COPY [[FMAXIMUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 + %0:_(s32) = COPY $s0 + %1:_(s32) = G_FCONSTANT float 0.000000e+00 + %2:_(s1) = G_FCMP floatpred(olt), %0(s32), %1 + %3:_(s32) = G_SELECT %2(s1), %1, %0 + $s0 = COPY %3(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: test_s64 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_s64 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: $d0 = COPY [[FMAXIMUM]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(s64) = COPY $d0 + %1:_(s64) = G_FCONSTANT double 0.000000e+00 + %2:_(s1) = G_FCMP floatpred(olt), %0(s64), %1 + %3:_(s64) = G_SELECT %2(s1), %1, %0 + $d0 = COPY %3(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: test_s64_fmin +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_s64_fmin + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[C]], [[COPY]] + ; CHECK-NEXT: $d0 = COPY [[FMINIMUM]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(s64) = COPY $d0 + %1:_(s64) = G_FCONSTANT double 0.000000e+00 + %2:_(s1) = G_FCMP floatpred(ogt), %0(s64), %1 + %3:_(s64) = G_SELECT %2(s1), %1, %0 + $d0 = COPY %3(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v8s16 +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v8s16 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %0:_(<8 x s16>) = COPY $q0 + %2:_(s16) = G_FCONSTANT half 0xH0000 + %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16) + %3:_(<8 x s1>) = G_FCMP floatpred(olt), %0(<8 x s16>), %1 + %4:_(<8 x s16>) = G_SELECT %3(<8 x s1>), %1, %0 + $q0 = COPY %4(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4s32 +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v4s32 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BUILD_VECTOR]], [[BITCAST]] + ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %1:_(<2 x s64>) = COPY $q0 + %0:_(<4 x s32>) = G_BITCAST %1(<2 x s64>) + %3:_(s32) = G_FCONSTANT float 0.000000e+00 + %2:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32), %3(s32), %3(s32) + %4:_(<4 x s1>) = G_FCMP floatpred(olt), %0(<4 x s32>), %2 + %5:_(<4 x s32>) = G_SELECT %4(<4 x s1>), %2, %0 + $q0 = COPY %5(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2s64 +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v2s64 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(s64) = G_FCONSTANT double 0.000000e+00 + %1:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64) + %3:_(<2 x s1>) = G_FCMP floatpred(olt), %0(<2 x s64>), %1 + %4:_(<2 x s64>) = G_SELECT %3(<2 x s1>), %1, %0 + $q0 = COPY %4(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2s64_fmin +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v2s64_fmin + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FMINIMUM]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(s64) = G_FCONSTANT double 0.000000e+00 + %1:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64) + %3:_(<2 x s1>) = G_FCMP floatpred(ogt), %0(<2 x s64>), %1 + %4:_(<2 x s64>) = G_SELECT %3(<2 x s1>), %1, %0 + $q0 = COPY %4(<2 x s64>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s +define half @test_s16(half %a) #0 { +; CHECK-LABEL: test_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: fmax h0, h1, h0 +; CHECK-NEXT: ret +entry: + %fcmp = fcmp olt half %a, 0.0 + %sel = select i1 %fcmp, half 0.0, half %a + ret half %sel +} + +define float @test_s32(float %a) #0 { +; CHECK-LABEL: test_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: fmax s0, s1, s0 +; CHECK-NEXT: ret +entry: + %fcmp = fcmp olt float %a, 0.0 + %sel = select i1 %fcmp, float 0.0, float %a + ret float %sel +} + +define double @test_s64(double %a) #0 { +; CHECK-LABEL: test_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: fmax d0, d1, d0 +; CHECK-NEXT: ret +entry: + %fcmp = fcmp olt double %a, 0.0 + %sel = select i1 %fcmp, double 0.0, double %a + ret double %sel +} + +define <8 x half> @test_v8s16(<8 x half> %a) #0 { +; CHECK-LABEL: test_v8s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: dup v1.8h, v1.h[0] +; CHECK-NEXT: fmax v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret +entry: + %fcmp = fcmp olt <8 x half> %a, zeroinitializer + %sel = select <8 x i1> %fcmp, <8 x half> zeroinitializer, <8 x half> %a + ret <8 x half> %sel +} + +define <4 x float> @test_v4s32(<4 x float> %a) #0 { +; CHECK-LABEL: test_v4s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: dup v1.4s, v1.s[0] +; CHECK-NEXT: fmax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret +entry: + %fcmp = fcmp olt <4 x float> %a, zeroinitializer + %sel = select <4 x i1> %fcmp, <4 x float> zeroinitializer, <4 x float> %a + ret <4 x float> %sel +} + +define <2 x double> @test_v2s64(<2 x double> %a) #0 { +; CHECK-LABEL: test_v2s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: dup v1.2d, v1.d[0] +; CHECK-NEXT: fmax v0.2d, v1.2d, v0.2d +; CHECK-NEXT: ret +entry: + %fcmp = fcmp olt <2 x double> %a, zeroinitializer + %sel = select <2 x i1> %fcmp, <2 x double> zeroinitializer, <2 x double> %a + ret <2 x double> %sel +} +