Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -767,6 +767,10 @@ SmallBitVector DeadDefs(NumDefs); for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { Register DefReg = MI.getReg(DefIdx); + if (MRI.use_empty(DefReg)) { + DeadDefs[DefIdx] = true; + continue; + } Register FoundVal = ValueFinder.findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); if (!FoundVal || FoundVal == DefReg) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s + +define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float> addrspace(4)* %ptr) { +; GFX10-LABEL: value_finder_bug: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: global_load_dwordx4 v[1:4], v[1:2], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GFX10-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %vec = load <4 x float>, <4 x float> addrspace(4)* %ptr, align 4 + %vec.3 = extractelement <4 x float> %vec, i32 3 + %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> + %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1 + store <2 x float> %new_vec, <2 x float> addrspace(5)* %store_ptr, align 8 + ret void +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -0,0 +1,137 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=GFX10 + +--- | + + define void @value_finder_bug() { ret void } + define void @value_finder_bug_before_artifact_combine() { ret void } + define void @value_finder_bug_before_artifact_combine_dbg_use() { ret void } + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "llvm", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) + !1 = !DIFile(filename: "bug-legalization-artifact-combiner-dead-def", directory: "/tmp") + !2 = !{} + !3 = !{i32 2, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = distinct !DISubprogram(name: "value_finder_bug_before_artifact_combine_dbg_use", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) + !6 = !DISubroutineType(types: !2) + !7 = !DILocalVariable(name: "in", arg: 1, scope: !5, file: !1, line: 1, type: !8) + !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !9 = !DILocation(line: 1, column: 1, scope: !5) +... + +--- +name: value_finder_bug +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX10-LABEL: name: value_finder_bug + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 + ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32) + ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>) + ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10: G_STORE [[UV1]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + %0:_(p5) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<4 x s32>) = G_IMPLICIT_DEF + %5:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4) + %6:_(s32) = G_CONSTANT i32 3 + %7:_(s32) = G_EXTRACT_VECTOR_ELT %5(<4 x s32>), %6(s32) + %8:_(<2 x s32>) = G_SHUFFLE_VECTOR %5(<4 x s32>), %4, shufflemask(2, undef) + %9:_(s32) = G_CONSTANT i32 1 + %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %8, %7(s32), %9(s32) + G_STORE %10(<2 x s32>), %0(p5) :: (store (<2 x s32>), addrspace 5) +... + +--- +name: value_finder_bug_before_artifact_combine +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 + ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32) + ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32 + ; GFX10: %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>) + ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + %0:_(p5) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4) + %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96 + %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64 + %7:_(s32) = G_IMPLICIT_DEF + %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32) + %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32 + %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>) + G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5) + %12:_(s32) = G_CONSTANT i32 4 + %13:_(p5) = G_PTR_ADD %0, %12(s32) + G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5) + +... + +--- +name: value_finder_bug_before_artifact_combine_dbg_use +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine_dbg_use + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) + ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 + ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32) + ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32 + ; GFX10: %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>) + ; GFX10: DBG_VALUE [[EXTRACT1]](s32), $noreg, <0xb6ade0>, !DIExpression(), debug-location !DILocation(line: 1, column: 1, scope: <0xb6aa70>) + ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + %0:_(p5) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4) + %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96 + %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64 + %7:_(s32) = G_IMPLICIT_DEF + %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32) + %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32 + %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>) + DBG_VALUE %dbg_use(s32), $noreg, !7, !DIExpression(), debug-location !9 + G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5) + %12:_(s32) = G_CONSTANT i32 4 + %13:_(p5) = G_PTR_ADD %0, %12(s32) + G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5) +...