diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -418,7 +418,7 @@ }) .clampScalar(0, MinFPScalar, s128); - getActionDefinitionsBuilder({G_ICMP, G_FCMP}) + getActionDefinitionsBuilder(G_ICMP) .legalFor({{s32, s32}, {s32, s64}, {s32, p0}, @@ -449,6 +449,43 @@ s64) .clampNumElements(0, v2s32, v4s32); + getActionDefinitionsBuilder(G_FCMP) + // If we don't have full FP16 support, then scalarize the elements of + // vectors containing fp16 types. + .fewerElementsIf( + [=](const LegalityQuery &Query) { + const auto &Ty = Query.Types[0]; + return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16; + }, + [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) + // If we don't have full FP16 support, then widen s16 to s32 if we + // encounter it. + .widenScalarIf( + [=](const LegalityQuery &Query) { + return Query.Types[0] == s16 && !HasFP16; + }, + [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) + .legalFor({{s16, s16}, + {s32, s32}, + {s32, s64}, + {v4s32, v4s32}, + {v2s32, v2s32}, + {v2s64, v2s64}, + {v4s16, v4s16}, + {v8s16, v8s16}}) + .widenScalarOrEltToNextPow2(1) + .clampScalar(1, s32, s64) + .clampScalar(0, s32, s32) + .minScalarEltSameAsIf( + [=](const LegalityQuery &Query) { + const LLT &Ty = Query.Types[0]; + const LLT &SrcTy = Query.Types[1]; + return Ty.isVector() && !SrcTy.getElementType().isPointer() && + Ty.getElementType() != SrcTy.getElementType(); + }, + 0, 1) + .clampNumElements(0, v2s32, v4s32); + // Extensions auto ExtLegalFunc = [=](const LegalityQuery &Query) { unsigned DstSize = Query.Types[0].getSizeInBits(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -961,9 +961,10 @@ const auto Pred = static_cast(MI.getOperand(1).getPredicate()); Register LHS = MI.getOperand(2).getReg(); - // TODO: Handle v4s16 case. unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits(); - if (EltSize != 32 && EltSize != 64) + if (EltSize == 16 && !ST.hasFullFP16()) + return false; + if (EltSize != 16 && EltSize != 32 && EltSize != 64) return false; Register RHS = MI.getOperand(3).getReg(); auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir @@ -23,3 +23,99 @@ $w0 = COPY %5(s32) ... +--- +name: legalize_v8s16 +alignment: 4 +legalized: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: legalize_v8s16 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %lhs:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: %rhs:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %lhs(<8 x s16>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %rhs(<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP]](s32) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16) + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP1]](s32) + ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16) + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT4]](s32), [[FPEXT5]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP2]](s32) + ; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16) + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT6]](s32), [[FPEXT7]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP3]](s32) + ; CHECK-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; CHECK-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV12]](s16) + ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT8]](s32), [[FPEXT9]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP4]](s32) + ; CHECK-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; CHECK-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV13]](s16) + ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT10]](s32), [[FPEXT11]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP5]](s32) + ; CHECK-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) + ; CHECK-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[UV14]](s16) + ; CHECK-NEXT: [[FCMP6:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT12]](s32), [[FPEXT13]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP6]](s32) + ; CHECK-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) + ; CHECK-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[UV15]](s16) + ; CHECK-NEXT: [[FCMP7:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT14]](s32), [[FPEXT15]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP7]](s32) + ; CHECK-NEXT: %fcmp:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CHECK-NEXT: $q0 = COPY %fcmp(<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %lhs:_(<8 x s16>) = COPY $q0 + %rhs:_(<8 x s16>) = COPY $q1 + %fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs + $q0 = COPY %fcmp(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: legalize_v4s16 +alignment: 4 +legalized: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: legalize_v4s16 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %lhs:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: %rhs:_(<4 x s16>) = COPY $d1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %lhs(<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %rhs(<4 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP]](s32) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP1]](s32) + ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) + ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT4]](s32), [[FPEXT5]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP2]](s32) + ; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) + ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT6]](s32), [[FPEXT7]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP3]](s32) + ; CHECK-NEXT: %fcmp:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK-NEXT: $d0 = COPY %fcmp(<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %lhs:_(<4 x s16>) = COPY $d0 + %rhs:_(<4 x s16>) = COPY $d1 + %fcmp:_(<4 x s16>) = G_FCMP floatpred(oeq), %lhs(<4 x s16>), %rhs + $d0 = COPY %fcmp(<4 x s16>) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -324,7 +324,6 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s ... --- name: oeq @@ -702,20 +702,20 @@ ... --- -name: dont_lower_s16 +name: lower_v8s16 alignment: 4 legalized: true body: | bb.0: liveins: $q0, $q1 - ; CHECK-LABEL: name: dont_lower_s16 + ; CHECK-LABEL: name: lower_v8s16 ; CHECK: liveins: $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %lhs:_(<8 x s16>) = COPY $q0 ; CHECK-NEXT: %rhs:_(<8 x s16>) = COPY $q1 - ; CHECK-NEXT: %fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs - ; CHECK-NEXT: $q0 = COPY %fcmp(<8 x s16>) + ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<8 x s16>) = G_FCMEQ %lhs, %rhs(<8 x s16>) + ; CHECK-NEXT: $q0 = COPY [[FCMEQ]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %lhs:_(<8 x s16>) = COPY $q0 %rhs:_(<8 x s16>) = COPY $q1 @@ -723,6 +723,29 @@ $q0 = COPY %fcmp(<8 x s16>) RET_ReallyLR implicit $q0 +... +--- +name: lower_v4s16 +alignment: 4 +legalized: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: lower_v4s16 + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %lhs:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: %rhs:_(<4 x s16>) = COPY $d1 + ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<4 x s16>) = G_FCMEQ %lhs, %rhs(<4 x s16>) + ; CHECK-NEXT: $d0 = COPY [[FCMEQ]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %lhs:_(<4 x s16>) = COPY $d0 + %rhs:_(<4 x s16>) = COPY $d1 + %fcmp:_(<4 x s16>) = G_FCMP floatpred(oeq), %lhs(<4 x s16>), %rhs + $d0 = COPY %fcmp(<4 x s16>) + RET_ReallyLR implicit $d0 + ... --- name: is_not_nan diff --git a/llvm/test/CodeGen/AArch64/vacg.ll b/llvm/test/CodeGen/AArch64/vacg.ll --- a/llvm/test/CodeGen/AArch64/vacg.ll +++ b/llvm/test/CodeGen/AArch64/vacg.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel | FileCheck %s define <4 x i32> @gt_v4f32(<4 x float> %a, <4 x float> %b) {