Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -861,6 +861,46 @@ widenScalarDst(MI, WideTy.getScalarType(), 0); return Legalized; } + case TargetOpcode::G_MERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (!DstTy.isScalar()) + return UnableToLegalize; + + unsigned NumSrc = MI.getNumOperands() - 1; + unsigned EltSize = DstTy.getSizeInBits() / NumSrc; + LLT EltTy = LLT::scalar(EltSize); + + unsigned ResultReg = MRI.createGenericVirtualRegister(DstTy); + unsigned Offset = 0; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I, + Offset += EltSize) { + assert(MRI.getType(MI.getOperand(I).getReg()) == EltTy); + + unsigned ShiftAmt = MRI.createGenericVirtualRegister(DstTy); + unsigned Shl = MRI.createGenericVirtualRegister(DstTy); + unsigned ZextInput = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildZExt(ZextInput, MI.getOperand(I).getReg()); + + if (Offset != 0) { + unsigned NextResult = I + 1 == E ? DstReg : + MRI.createGenericVirtualRegister(DstTy); + + MIRBuilder.buildConstant(ShiftAmt, Offset); + MIRBuilder.buildShl(Shl, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } else { + ResultReg = ZextInput; + } + } + + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: { if (TypeIdx == 1) Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -184,7 +184,8 @@ .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, // FIXME: Hack - {S128, S32}, {S128, S64}}) + {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}}) + .scalarize(0); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) @@ -508,6 +509,13 @@ }; getActionDefinitionsBuilder(Op) + .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) + // Clamp the little scalar to s8-s256 and make it a power of 2. It's not + // worth considering the multiples of 64 since 2*192 and 2*384 are not + // valid. + .clampScalar(LitTyIdx, S16, S256) + .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) + // Break up vectors with weird elements into scalars .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, @@ -534,12 +542,6 @@ } return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); }) - .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) - // Clamp the little scalar to s8-s256 and make it a power of 2. It's not - // worth considering the multiples of 64 since 2*192 and 2*384 are not - // valid. - .clampScalar(LitTyIdx, S16, S256) - .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) .legalIf([=](const LegalityQuery &Query) { const LLT &BigTy = Query.Types[BigTyIdx]; const LLT &LitTy = Query.Types[LitTyIdx]; Index: test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -1,30 +1,37 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -O0 -run-pass=legalizer %s -o - | FileCheck %s ---- | - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - target triple = "aarch64--" - define void @test_merge_s4() { - ret void - } -... +# Previously, LegalizerInfo was assuming all G_MERGE_VALUES and G_UNMERGE_VALUES +# instructions are legal. Make sure that is no longer happening. --- name: test_merge_s4 -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } body: | bb.0: - %0(s64) = G_CONSTANT i64 0 - %1(s4) = G_TRUNC %0(s64) - ; Previously, LegalizerInfo was assuming all G_MERGE_VALUES and G_UNMERGE_VALUES - ; instructions are legal. Make sure that is no longer happening. - ; CHECK: unable to legalize instruction: {{.*}} G_MERGE_VALUES - %2(s8) = G_MERGE_VALUES %1(s4), %1(s4) - %3(s8) = COPY %2(s8) - %4(s64) = G_ANYEXT %3(s8) - $x0 = COPY %4(s64) + ; CHECK-LABEL: name: test_merge_s4 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[AND1]](s32) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C4]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY2]](s8) + ; CHECK: $x0 = COPY [[ANYEXT]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s4) = G_TRUNC %0 + + %2:_(s8) = G_MERGE_VALUES %1, %1 + %3:_(s8) = COPY %2 + %4:_(s64) = G_ANYEXT %3 + $x0 = COPY %4 ... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -0,0 +1,156 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: test_merge_s16_s8_s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s16_s8_s8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[AND1]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s8) = G_CONSTANT i8 0 + %1:_(s8) = G_CONSTANT i8 1 + %2:_(s16) = G_MERGE_VALUES %0, %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_merge_s24_s8_s8_s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s24_s8_s8_s8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]] + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[AND1]](s32) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C8]] + ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C7]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C9]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[AND4]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY8]](s32) + %0:_(s8) = G_CONSTANT i8 0 + %1:_(s8) = G_CONSTANT i8 1 + %2:_(s8) = G_CONSTANT i8 2 + %3:_(s24) = G_MERGE_VALUES %0, %1, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: test_merge_s32_s8_s8_s8_s8 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s32_s8_s8_s8_s8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]] + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C8]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C9]] + ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C10]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: $vgpr0 = COPY [[OR2]](s32) + %0:_(s8) = G_CONSTANT i8 0 + %1:_(s8) = G_CONSTANT i8 1 + %2:_(s8) = G_CONSTANT i8 2 + %3:_(s8) = G_CONSTANT i8 3 + %4:_(s32) = G_MERGE_VALUES %0, %1, %2, %3 + $vgpr0 = COPY %4 +... + +--- +name: test_merge_s64_s32_s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: test_merge_s64_s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: $vgpr1_vgpr2 = COPY [[MV]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = G_MERGE_VALUES %0, %1 + $vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_merge_s64_s16_s16_s16_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-LABEL: name: test_merge_s64_s16_s16_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: $vgpr1_vgpr2 = COPY [[MV]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s16) = G_TRUNC %0 + %5:_(s16) = G_TRUNC %1 + %6:_(s16) = G_TRUNC %2 + %7:_(s16) = G_TRUNC %3 + %8:_(s64) = G_MERGE_VALUES %4, %5, %6, %7 + $vgpr1_vgpr2 = COPY %8 +...