Index: include/llvm/Target/GenericOpcodes.td =================================================================== --- include/llvm/Target/GenericOpcodes.td +++ include/llvm/Target/GenericOpcodes.td @@ -499,8 +499,8 @@ // indexes. This will almost certainly be mapped to sub-register COPYs after // register banks have been selected. def G_UNMERGE_VALUES : Instruction { - let OutOperandList = (outs); - let InOperandList = (ins variable_ops); + let OutOperandList = (outs type0:$dst0, variable_ops); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } @@ -514,7 +514,7 @@ /// Concatenante multiple registers of the same size into a wider register. def G_MERGE_VALUES : Instruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins variable_ops); + let InOperandList = (ins type1:$src0, variable_ops); let hasSideEffects = 0; } Index: lib/CodeGen/GlobalISel/LegalizerInfo.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -167,13 +167,6 @@ assert(TablesInitialized && "backend forgot to call computeTables"); // These *have* to be implemented for now, they're the fundamental basis of // how everything else is transformed. - - // FIXME: the long-term plan calls for expansion in terms of load/store (if - // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || - Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) - return std::make_pair(Legal, Aspect.Type); - if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) return findScalarLegalAction(Aspect); assert(Aspect.Type.isVector()); @@ -198,7 +191,17 @@ SeenTypes.set(TypeIdx); - LLT Ty = MRI.getType(MI.getOperand(i).getReg()); + unsigned Op = MI.getOperand(i).getReg(); + // G_MERGE_VALUES and G_UNMERGE_VALUES have variable number of operands, + // but there is only one source type and one destination type as all sources + // for G_MERGE_VALUES and all destination for G_UNMERGE_VALUES must be the + // same type. So, get the last operand if TypeIdx == 1. + if ((MI.getOpcode() == TargetOpcode::G_MERGE_VALUES || + MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && + TypeIdx == 1) { + Op = MI.getOperand(MI.getNumOperands() - 1).getReg(); + } + LLT Ty = MRI.getType(Op); auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); if (Action.first != Legal) return std::make_tuple(Action.first, TypeIdx, Action.second); Index: lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -135,10 +135,15 @@ const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT s96 = LLT::scalar(96); const LLT s128 = LLT::scalar(128); + const LLT s192 = LLT::scalar(192); const LLT v2s32 = LLT::vector(2, 32); + const LLT v3s32 = LLT::vector(3, 32); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); + const LLT v4s64 = LLT::vector(4, 64); + const LLT v6s64 = LLT::vector(6, 64); for (auto Ty : {p0, s1, s8, s16, s32, s64}) setAction({G_IMPLICIT_DEF, Ty}, Legal); @@ -349,6 +354,17 @@ for (auto Ty : {s8, s16, s32, s64, p0}) setAction({G_VAARG, Ty}, Custom); + // Merge/Unmerge + for (const auto &Ty : + {s32, s64, s96, s128, s192, v2s32, v3s32, v4s32, v2s64, v4s64, v6s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + for (const auto &Ty : {s16, s32, s64, v2s32, v4s32, v2s64}) { + setAction({G_MERGE_VALUES, 1, Ty}, Legal); + setAction({G_UNMERGE_VALUES, Ty}, Legal); + } + computeTables(); } Index: lib/Target/ARM/ARMLegalizerInfo.cpp =================================================================== --- lib/Target/ARM/ARMLegalizerInfo.cpp +++ lib/Target/ARM/ARMLegalizerInfo.cpp @@ -177,6 +177,15 @@ for (auto Ty : {s32, s64}) setAction({Op, Ty}, Libcall); + // Merge/Unmerge + for (const auto &Ty : {s32, s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + for (const auto &Ty : {s16, s32}) { + setAction({G_MERGE_VALUES, 1, Ty}, Legal); + setAction({G_UNMERGE_VALUES, Ty}, Legal); + } computeTables(); } Index: lib/Target/X86/X86LegalizerInfo.cpp =================================================================== --- lib/Target/X86/X86LegalizerInfo.cpp +++ lib/Target/X86/X86LegalizerInfo.cpp @@ -90,6 +90,7 @@ const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); for (auto Ty : {p0, s1, s8, s16, s32}) setAction({G_IMPLICIT_DEF, Ty}, Legal); @@ -140,6 +141,16 @@ for (auto Ty : {s8, s16, s32, p0}) setAction({G_ICMP, 1, Ty}, Legal); + + // Merge/Unmerge + for (const auto &Ty : {s16, s32, s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + for (const auto &Ty : {s8, s16, s32}) { + setAction({G_MERGE_VALUES, 1, Ty}, Legal); + setAction({G_UNMERGE_VALUES, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfo64bit() { @@ -148,6 +159,7 @@ return; const LLT s64 = LLT::scalar(64); + const LLT s128 = LLT::scalar(128); setAction({G_IMPLICIT_DEF, s64}, Legal); @@ -172,6 +184,12 @@ // Comparison setAction({G_ICMP, 1, s64}, Legal); + + // Merge/Unmerge + setAction({G_MERGE_VALUES, s128}, Legal); + setAction({G_UNMERGE_VALUES, 1, s128}, Legal); + setAction({G_MERGE_VALUES, 1, s128}, Legal); + setAction({G_UNMERGE_VALUES, s128}, Legal); } void X86LegalizerInfo::setLegalizerInfoSSE1() { @@ -179,6 +197,7 @@ return; const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); @@ -192,6 +211,14 @@ // Constants setAction({TargetOpcode::G_FCONSTANT, s32}, Legal); + + // Merge/Unmerge + for (const auto &Ty : {v4s32, v2s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + setAction({G_MERGE_VALUES, 1, s64}, Legal); + setAction({G_UNMERGE_VALUES, s64}, Legal); } void X86LegalizerInfo::setLegalizerInfoSSE2() { @@ -205,6 +232,11 @@ const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) for (auto Ty : {s64, v2s64}) setAction({BinOp, Ty}, Legal); @@ -220,6 +252,17 @@ // Constants setAction({TargetOpcode::G_FCONSTANT, s64}, Legal); + + // Merge/Unmerge + for (const auto &Ty : + {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + for (const auto &Ty : {v16s8, v8s16, v4s32, v2s64}) { + setAction({G_MERGE_VALUES, 1, Ty}, Legal); + setAction({G_UNMERGE_VALUES, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoSSE41() { @@ -241,9 +284,13 @@ const LLT v2s64 = LLT::vector(2, 64); const LLT v32s8 = LLT::vector(32, 8); + const LLT v64s8 = LLT::vector(64, 8); const LLT v16s16 = LLT::vector(16, 16); + const LLT v32s16 = LLT::vector(32, 16); const LLT v8s32 = LLT::vector(8, 32); + const LLT v16s32 = LLT::vector(16, 32); const LLT v4s64 = LLT::vector(4, 64); + const LLT v8s64 = LLT::vector(8, 64); for (unsigned MemOp : {G_LOAD, G_STORE}) for (auto Ty : {v8s32, v4s64}) @@ -257,6 +304,17 @@ setAction({G_INSERT, 1, Ty}, Legal); setAction({G_EXTRACT, Ty}, Legal); } + // Merge/Unmerge + for (const auto &Ty : + {v32s8, v64s8, v16s16, v32s16, v8s32, v16s32, v4s64, v8s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + for (const auto &Ty : + {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) { + setAction({G_MERGE_VALUES, 1, Ty}, Legal); + setAction({G_UNMERGE_VALUES, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoAVX2() { @@ -268,12 +326,27 @@ const LLT v8s32 = LLT::vector(8, 32); const LLT v4s64 = LLT::vector(4, 64); + const LLT v64s8 = LLT::vector(64, 8); + const LLT v32s16 = LLT::vector(32, 16); + const LLT v16s32 = LLT::vector(16, 32); + const LLT v8s64 = LLT::vector(8, 64); + for (unsigned BinOp : {G_ADD, G_SUB}) for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) setAction({BinOp, Ty}, Legal); for (auto Ty : {v16s16, v8s32}) setAction({G_MUL, Ty}, Legal); + + // Merge/Unmerge + for (const auto &Ty : {v64s8, v32s16, v16s32, v8s64}) { + setAction({G_MERGE_VALUES, Ty}, Legal); + setAction({G_UNMERGE_VALUES, 1, Ty}, Legal); + } + for (const auto &Ty : {v32s8, v16s16, v8s32, v4s64}) { + setAction({G_MERGE_VALUES, 1, Ty}, Legal); + setAction({G_UNMERGE_VALUES, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoAVX512() { Index: test/CodeGen/AArch64/GlobalISel/legalize-combines.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalize-combines.mir +++ test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -8,7 +8,6 @@ define void @test_combines_3() { ret void } define void @test_combines_4() { ret void } define void @test_combines_5() { ret void } - define void @test_combines_6() { ret void } ... --- @@ -84,22 +83,3 @@ %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2 %5:_(s32) = G_ADD %3, %4 ... - ---- -name: test_combines_6 -body: | - bb.0: - liveins: %w0 - - ; Check that we replace all the uses of a G_EXTRACT. - ; CHECK-LABEL: name: test_combines_6 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 - ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[MUL]] - %0:_(s32) = COPY %w0 - - %1:_(s32) = G_MERGE_VALUES %0 - %2:_(s32) = G_UNMERGE_VALUES %1 - %3:_(s32) = G_MUL %2, %2 - %4:_(s32) = G_ADD %2, %3 -... Index: test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -0,0 +1,28 @@ +# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_merge_v2s4() { + ret void + } +... + +--- +name: test_merge_v2s4 +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +body: | + bb.0: + %0(s64) = G_CONSTANT i64 0 + %1(s4) = G_TRUNC %0(s64) + ; CHECK: unable to legalize instruction: {{.*}} G_MERGE_VALUES + %2(<2 x s4>) = G_MERGE_VALUES %1(s4), %1(s4), %1(s4), %1(s4) + %3(s8) = G_BITCAST %2(<2 x s4>) + %4(s64) = G_ANYEXT %3(s8) + %x0 = COPY %4(s64) +... Index: test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir @@ -0,0 +1,29 @@ +# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_unmerge_v2s4() { + ret void + } +... + +--- +name: test_unmerge_v2s4 +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + %0(s16) = G_CONSTANT i16 0 + ; CHECK: unable to legalize instruction: {{.*}} G_UNMERGE_VALUES + %1(<2 x s4>), %2(<2 x s4>)= G_UNMERGE_VALUES %0(s16) + %3(s8) = G_BITCAST %1(<2 x s4>) + %5(s64) = G_ANYEXT %4(s8) + %x0 = COPY %5(s64) + +... Index: test/CodeGen/AArch64/GlobalISel/no-regclass.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/no-regclass.mir +++ test/CodeGen/AArch64/GlobalISel/no-regclass.mir @@ -25,7 +25,7 @@ ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 ; CHECK: %w0 = COPY [[COPY]] %0:gpr(s32) = COPY %w0 - %1:gpr(s32) = G_MERGE_VALUES %0(s32) - %2:gpr(s32) = G_UNMERGE_VALUES %1(s32) + %1:gpr(s64) = G_MERGE_VALUES %0(s32), %0(s32) + %2:gpr(s32), %3:gpr(s32) = G_UNMERGE_VALUES %1(s64) %w0 = COPY %2(s32) ...