diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -269,7 +269,7 @@ if (Opcode == TargetOpcode::G_ANYEXT) { // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF - if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) + if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) return false; LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -741,19 +741,34 @@ default: return UnableToLegalize; case TargetOpcode::G_IMPLICIT_DEF: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + // If SizeOp0 is not an exact multiple of NarrowSize, emit + // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed. + // FIXME: Although this would also be legal for the general case, it causes + // a lot of regressions in the emitted code (superfluous COPYs, artifact + // combines not being hit). This seems to be a problem related to the + // artifact combiner. + if (SizeOp0 % NarrowSize != 0) { + LLT ImplicitTy = NarrowTy; + if (DstTy.isVector()) + ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy); + + Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0); + MIRBuilder.buildAnyExt(DstReg, ImplicitReg); + + MI.eraseFromParent(); + return Legalized; + } + int NumParts = SizeOp0 / NarrowSize; SmallVector DstRegs; for (int i = 0; i < NumParts; ++i) - DstRegs.push_back( - MIRBuilder.buildUndef(NarrowTy).getReg(0)); + DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0)); - Register DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) + if (DstTy.isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else MIRBuilder.buildMerge(DstReg, DstRegs); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir @@ -0,0 +1,17 @@ +# RUN: not --crash llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - 2>&1 | FileCheck %s +# RUN: not --crash llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - 2>&1 | FileCheck %s + +# This test is not legalizable and produces more than 9000 instructions for it +# before it fails, including legalizer artifacts like +# %foo:_(s65600) = G_MERGE_VALUES. Since it is not really feasible to have that +# many CHECK-lines, only the presence of the error is checked. + +--- +name: test_implicit_def_s1025 +body: | + bb.0: + ; CHECK: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(s65600) = G_MERGE_VALUES + %0:_(s1025) = G_IMPLICIT_DEF + %1:_(s32) = G_TRUNC %0 + $vgpr0 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -175,28 +175,34 @@ ... --- -name: test_implicit_def_s1025 -body: | - bb.0: - - ; CHECK-LABEL: name: test_implicit_def_s1025 - ; CHECK: [[DEF:%[0-9]+]]:_(s1025) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1025) - ; CHECK: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s1025) = G_IMPLICIT_DEF - %1:_(s32) = G_TRUNC %0 - $vgpr0 = COPY %1 -... - ---- name: test_implicit_def_s1056 body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s1056 - ; CHECK: [[DEF:%[0-9]+]]:_(s1056) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1056) - ; CHECK: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32) + ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV9]](s32) + ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV10]](s32), [[UV11]](s32) + ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV13]](s32) + ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV14]](s32), [[UV15]](s32) + ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s32), [[UV17]](s32) + ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV18]](s32), [[UV19]](s32) + ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV20]](s32), [[UV21]](s32) + ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV22]](s32), [[UV23]](s32) + ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV25]](s32) + ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV26]](s32), [[UV27]](s32) + ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV28]](s32), [[UV29]](s32) + ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV30]](s32), [[UV31]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV16:%[0-9]+]]:_(s2112) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV16]](s2112) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s1056) + ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32) %0:_(s1056) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -2748,4 +2748,60 @@ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// Test narrowing of G_IMPLICIT_DEF +TEST_F(AArch64GISelMITest, NarrowImplicitDef) { + setUp(); + if (!TM) + return; + + DefineLegalizerInfo(A, {}); + + // Make sure that G_IMPLICIT_DEF can be narrowed if the original size is not a + // multiple of narrow size + LLT S32{LLT::scalar(32)}; + LLT S48{LLT::scalar(48)}; + LLT S64{LLT::scalar(64)}; + LLT V2S64{{LLT::vector(2, 64)}}; + + auto Implicit1 = B.buildUndef(S64); + auto Implicit2 = B.buildUndef(S64); + auto Implicit3 = B.buildUndef(V2S64); + auto Implicit4 = B.buildUndef(V2S64); + + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + + // Perform Legalization + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.narrowScalar(*Implicit1, 0, S48)); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.narrowScalar(*Implicit2, 0, S32)); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.narrowScalar(*Implicit3, 0, S48)); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.narrowScalar(*Implicit4, 0, S32)); + + const auto *CheckStr = R"( + CHECK: [[DEF:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[DEF]] + + CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]]:_(s32), [[DEF1]] + + CHECK: [[DEF:%[0-9]+]]:_(<2 x s48>) = G_IMPLICIT_DEF + CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[DEF]] + + CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BV:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]]:_(s32), [[DEF1]]:_(s32), [[DEF2]]:_(s32), [[DEF3]]:_(s32) + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + } // namespace