diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -162,16 +162,18 @@ Register widenWithUnmerge(LLT WideTy, Register OrigReg); private: - LegalizeResult - widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); - LegalizeResult - widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); - LegalizeResult - widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); - LegalizeResult - widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); - LegalizeResult - widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); + LegalizeResult widenScalarConcatVectors(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); + LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); + LegalizeResult widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); + LegalizeResult widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); + LegalizeResult widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); + LegalizeResult widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); /// Helper function to split a wide generic register into bitwise blocks with /// the given Type (which implies the number of blocks needed). The generic diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1373,6 +1373,21 @@ MO.setReg(CastDst); } +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarConcatVectors(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) + widenScalarSrc(MI, WideTy, SrcIdx, TargetOpcode::G_ANYEXT); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + widenScalarDst(MI, DstTy.changeElementType(WideTy.getElementType())); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1772,6 +1787,8 @@ switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_CONCAT_VECTORS: + return widenScalarConcatVectors(MI, TypeIdx, WideTy); case TargetOpcode::G_EXTRACT: return widenScalarExtract(MI, TypeIdx, WideTy); case TargetOpcode::G_INSERT: @@ -3921,7 +3938,13 @@ switch (MI.getOpcode()) { case G_IMPLICIT_DEF: return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); - case G_TRUNC: + case G_TRUNC: { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT NewNarrowTy = NarrowTy; + if (NarrowTy.isVector()) + LLT::vector(NarrowTy.getNumElements(), DstTy.getElementType()); + return reduceOperationWidth(MI, 0, NewNarrowTy); + } case G_AND: case G_OR: case G_XOR: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -349,7 +349,8 @@ .minScalarOrEltIf( [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, s64) - .widenScalarOrEltToNextPow2(1); + .widenScalarOrEltToNextPow2(1) + .clampNumElements(0, v2s32, v4s32); getActionDefinitionsBuilder(G_FCMP) .legalFor({{s32, s32}, {s32, s64}}) @@ -388,7 +389,11 @@ .legalIf(ExtLegalFunc) .clampScalar(0, s64, s64); // Just for s128, others are handled above. - getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); + getActionDefinitionsBuilder(G_TRUNC) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, + 0, s8) + .alwaysLegal(); getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); @@ -650,7 +655,9 @@ .clampNumElements(0, v2s64, v2s64); getActionDefinitionsBuilder(G_CONCAT_VECTORS) - .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); + .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v8s8, v4s8}}) + .clampNumElements(0, v4s32, v4s32) + .minScalarOrElt(1, s8); getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}}); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir @@ -1920,3 +1920,40 @@ RET_ReallyLR implicit $d0 ... +--- +name: icmp_2xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: icmp_2xs32 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) + ; CHECK: $d0 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_ANYEXT %6(<8 x s1>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +...