diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -821,6 +821,12 @@ Builder.setInstrAndDebugLoc(MI); + ArtifactValueFinder Finder(MRI, Builder, LI); + if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { + markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); + return true; + } + if (auto *SrcUnmerge = dyn_cast(SrcDef)) { // %0:_(<4 x s16>) = G_FOO // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 @@ -844,15 +850,9 @@ if (ActionStep.TypeIdx == 1) return false; break; - default: { - ArtifactValueFinder Finder(MRI, Builder, LI); - if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { - markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); - return true; - } + default: return false; } - } auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); @@ -883,16 +883,7 @@ ConvertOp, OpTy, DestTy)) { // We might have a chance to combine later by trying to combine // unmerge(cast) first - if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs)) - return true; - - // Try using the value finder. - ArtifactValueFinder Finder(MRI, Builder, LI); - if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { - markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); - return true; - } - return false; + return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); } const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -241,8 +241,8 @@ liveins: $x0 ; CHECK-LABEL: name: test_eve_v4p0 ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: %idx:_(s64) = G_CONSTANT i64 1 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[DEF]](p0), [[DEF]](p0) + ; CHECK: %idx:_(s64) = G_CONSTANT i64 1 ; CHECK: %eve:_(p0) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x p0>), %idx(s64) ; CHECK: $x0 = COPY %eve(p0) ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -314,9 +314,9 @@ ; CHECK-LABEL: name: store_32xs8 ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store (<16 x s8>), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) @@ -338,9 +338,9 @@ ; CHECK-LABEL: name: store_16xs16 ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store (<8 x s16>), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) @@ -362,9 +362,9 @@ ; CHECK-LABEL: name: store_8xs32 ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store (<4 x s32>), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -254,9 +254,9 @@ ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -846,19 +846,16 @@ ; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) - ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32) + ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -837,31 +837,24 @@ ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]] ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>) - ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32) - ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) + ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) - ; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) + ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] + ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] - ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] + ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] ; SI: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] - ; SI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; SI: [[AND38:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]] - ; SI: [[AND39:%[0-9]+]]:_(s32) = G_AND [[BITCAST17]], [[C1]] - ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C]](s32) - ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND38]], [[SHL20]] - ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) - ; SI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>) + ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) + ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -990,31 +983,24 @@ ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]] ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>) - ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32) - ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) + ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) - ; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) + ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] + ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] - ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] + ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] - ; VI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]] - ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[BITCAST17]], [[C1]] - ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32) - ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL20]] - ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) - ; VI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>) + ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) + ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1074,19 +1060,16 @@ ; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32) + ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -711,7 +711,6 @@ ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5) ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5) ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5) - ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) @@ -728,6 +727,7 @@ ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) + ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -148,25 +148,18 @@ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -397,25 +390,18 @@ ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) ; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -634,25 +620,18 @@ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -710,25 +689,18 @@ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -1212,25 +1184,18 @@ ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1298,25 +1263,18 @@ ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1384,25 +1342,18 @@ ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -535,10 +535,7 @@ ; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: @@ -555,10 +552,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: load_1d_v3f16_xyz: @@ -571,15 +565,11 @@ ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 +; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0 ; GFX10-NEXT: ; return to shader part epilog %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -140,9 +140,9 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_mov_b32 s4, 0xffff ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] +; GFX8-NEXT: s_mov_b32 s5, s4 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16 ; GFX8-NEXT: s_and_b32 s2, s0, s4 -; GFX8-NEXT: s_mov_b32 s5, s4 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16 ; GFX8-NEXT: s_and_b32 s6, s1, s4 ; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]