diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -814,6 +814,12 @@ Builder.setInstrAndDebugLoc(MI); + ArtifactValueFinder Finder(MRI, Builder, LI); + if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { + markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); + return true; + } + if (auto *SrcUnmerge = dyn_cast(SrcDef)) { // %0:_(<4 x s16>) = G_FOO // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 @@ -837,15 +843,9 @@ if (ActionStep.TypeIdx == 1) return false; break; - default: { - ArtifactValueFinder Finder(MRI, Builder, LI); - if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { - markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); - return true; - } + default: return false; } - } auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); @@ -876,16 +876,7 @@ ConvertOp, OpTy, DestTy)) { // We might have a chance to combine later by trying to combine // unmerge(cast) first - if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs)) - return true; - - // Try using the value finder. - ArtifactValueFinder Finder(MRI, Builder, LI); - if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { - markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); - return true; - } - return false; + return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); } const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -252,8 +252,8 @@ liveins: $x0 ; CHECK-LABEL: name: test_eve_v4p0 ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: %idx:_(s64) = G_CONSTANT i64 1 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[DEF]](p0), [[DEF]](p0) + ; CHECK: %idx:_(s64) = G_CONSTANT i64 1 ; CHECK: %eve:_(p0) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x p0>), %idx(s64) ; CHECK: $x0 = COPY %eve(p0) ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -315,9 +315,9 @@ ; CHECK-LABEL: name: store_32xs8 ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store (<16 x s8>), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) @@ -339,9 +339,9 @@ ; CHECK-LABEL: name: store_16xs16 ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store (<8 x s16>), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) @@ -363,9 +363,9 @@ ; CHECK-LABEL: name: store_8xs32 ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store (<4 x s32>), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -737,7 +737,6 @@ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32) ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[BUILD_VECTOR2]](<4 x s32>), %bb.1, [[BUILD_VECTOR]](<4 x s32>), %bb.0 - ; CHECK: [[PHI1:%[0-9]+]]:_(<4 x s32>) = G_PHI [[BUILD_VECTOR3]](<4 x s32>), %bb.1, [[BUILD_VECTOR1]](<4 x s32>), %bb.0 ; CHECK: %one:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: %extract:_(s32) = G_EXTRACT_VECTOR_ELT [[PHI]](<4 x s32>), [[C]](s64) @@ -781,7 +780,6 @@ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16) ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<8 x s16>) = G_PHI [[BUILD_VECTOR2]](<8 x s16>), %bb.1, [[BUILD_VECTOR]](<8 x s16>), %bb.0 - ; CHECK: [[PHI1:%[0-9]+]]:_(<8 x s16>) = G_PHI [[BUILD_VECTOR3]](<8 x s16>), %bb.1, [[BUILD_VECTOR1]](<8 x s16>), %bb.0 ; CHECK: %one:_(s16) = G_CONSTANT i16 1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: %extract:_(s16) = G_EXTRACT_VECTOR_ELT [[PHI]](<8 x s16>), [[C]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -262,9 +262,9 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C1]](s64) ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C3]](s64) - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C5]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -782,37 +782,49 @@ ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 16 ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[ASHR]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL3]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -837,35 +849,44 @@ ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]] ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL3]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -886,7 +907,12 @@ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32) ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) @@ -894,16 +920,16 @@ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -384,12 +384,25 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s48) = G_TRUNC %0 @@ -1531,22 +1544,27 @@ ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C]] ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY7]](s32) ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C2]](s32) ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL5]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL6]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<6 x s8>) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -373,35 +373,40 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fadd_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -426,35 +431,40 @@ ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -242,8 +242,17 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) @@ -264,8 +273,17 @@ ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FCANONICALIZE2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -353,8 +353,17 @@ ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) @@ -382,8 +391,17 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) @@ -408,11 +426,13 @@ ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1447,8 +1447,17 @@ ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) @@ -1493,8 +1502,17 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] ; VI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; VI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) @@ -1539,11 +1557,13 @@ ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] ; GFX9: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) ; GFX9: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX9: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 @@ -1573,11 +1593,13 @@ ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX9-UNSAFE: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX9-UNSAFE: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s16 @@ -1619,11 +1641,13 @@ ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] ; GFX10: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) ; GFX10: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX10: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX10: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX10: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX10: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -342,8 +342,17 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) @@ -364,8 +373,17 @@ ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) @@ -386,11 +404,13 @@ ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -437,35 +437,40 @@ ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY3]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fma_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -499,35 +504,40 @@ ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -422,35 +422,40 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -481,35 +486,40 @@ ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -422,35 +422,40 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -481,35 +486,40 @@ ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -360,35 +360,40 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fmul_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -413,35 +418,40 @@ ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fmul_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -788,35 +788,44 @@ ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] - ; SI: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C4]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL5]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C4]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT11]], [[SHL6]] + ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C4]] - ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C4]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; SI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) ; VI-LABEL: name: test_fshl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -871,36 +880,45 @@ ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND5]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR11]] - ; VI: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; VI: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -974,27 +992,22 @@ ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR9]] ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[COPY27]](s32) - ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) ; GFX9: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY28]](s32), [[COPY29]](s32) - ; GFX9: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; GFX9: [[COPY31:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY30]](s32), [[COPY31]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) + ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -957,39 +957,30 @@ ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>) + ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) ; SI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32) - ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) + ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) - ; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) ; SI: [[COPY47:%[0-9]+]]:_(s32) = COPY [[BITCAST14]](s32) ; SI: [[AND34:%[0-9]+]]:_(s32) = G_AND [[COPY47]], [[C1]] - ; SI: [[COPY48:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; SI: [[COPY48:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[COPY48]], [[C1]] ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] - ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI: [[COPY49:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) + ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; SI: [[COPY49:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[COPY49]], [[C1]] ; SI: [[COPY50:%[0-9]+]]:_(s32) = COPY [[BITCAST16]](s32) ; SI: [[AND37:%[0-9]+]]:_(s32) = G_AND [[COPY50]], [[C1]] ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] - ; SI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; SI: [[COPY51:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) - ; SI: [[AND38:%[0-9]+]]:_(s32) = G_AND [[COPY51]], [[C1]] - ; SI: [[COPY52:%[0-9]+]]:_(s32) = COPY [[BITCAST17]](s32) - ; SI: [[AND39:%[0-9]+]]:_(s32) = G_AND [[COPY52]], [[C1]] - ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C]](s32) - ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND38]], [[SHL20]] - ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) - ; SI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>) + ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) + ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1126,39 +1117,30 @@ ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>) + ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) ; VI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32) - ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) + ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) - ; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST14]](s32) ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] - ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST15]](s32) + ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST16]](s32) ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] - ; VI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) - ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] - ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST17]](s32) - ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C1]] - ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32) - ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL20]] - ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) - ; VI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>) + ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) + ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1232,27 +1214,22 @@ ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR7]] ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[COPY27]](s32) - ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) ; GFX9: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY28]](s32), [[COPY29]](s32) - ; GFX9: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX9: [[COPY31:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY30]](s32), [[COPY31]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) + ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -353,8 +353,17 @@ ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) @@ -382,8 +391,17 @@ ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) @@ -408,11 +426,13 @@ ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -266,8 +266,17 @@ ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) @@ -288,8 +297,17 @@ ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) @@ -310,11 +328,13 @@ ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -420,35 +420,40 @@ ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fsub_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -476,35 +481,40 @@ ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -532,7 +542,12 @@ ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -541,16 +556,13 @@ ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -717,7 +717,6 @@ ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5) ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5) ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5) - ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) @@ -734,6 +733,7 @@ ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) + ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -690,35 +690,40 @@ ; GFX6: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) ; GFX6: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] ; GFX6: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -762,35 +767,40 @@ ; GFX8: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] ; GFX8: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -834,7 +844,12 @@ ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] ; GFX9: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -843,16 +858,13 @@ ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -114,31 +114,36 @@ ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -160,31 +165,22 @@ ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -382,32 +378,37 @@ ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -433,31 +434,22 @@ ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -677,29 +669,34 @@ ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -720,31 +717,22 @@ ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -767,27 +755,31 @@ ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY11]](s32) + ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -808,31 +800,22 @@ ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -1331,30 +1314,35 @@ ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; UNPACKED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -1379,31 +1367,22 @@ ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1431,28 +1410,32 @@ ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY11]](s32) + ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -1477,31 +1460,22 @@ ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -1529,28 +1503,32 @@ ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY11]](s32) + ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) + ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; UNPACKED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -1575,31 +1553,22 @@ ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; PACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -4450,39 +4450,51 @@ ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4493,39 +4505,51 @@ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4536,7 +4560,12 @@ ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -4546,16 +4575,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4) @@ -4579,39 +4608,51 @@ ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4622,39 +4663,51 @@ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4665,7 +4718,12 @@ ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -4675,16 +4733,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 4) @@ -4741,36 +4799,45 @@ ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4808,36 +4875,45 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -4875,7 +4951,12 @@ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -4885,16 +4966,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -4468,39 +4468,51 @@ ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4511,39 +4523,51 @@ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4554,7 +4578,12 @@ ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -4564,16 +4593,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0) @@ -4597,39 +4626,51 @@ ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4640,39 +4681,51 @@ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4683,7 +4736,12 @@ ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -4693,16 +4751,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 0) @@ -4759,36 +4817,45 @@ ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4826,36 +4893,45 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -4893,7 +4969,12 @@ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -4903,16 +4984,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -7576,39 +7576,51 @@ ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7619,39 +7631,51 @@ ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7662,39 +7686,51 @@ ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7705,39 +7741,51 @@ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7748,7 +7796,12 @@ ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -7758,16 +7811,16 @@ ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7778,7 +7831,12 @@ ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -7788,16 +7846,16 @@ ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1) @@ -7821,39 +7879,51 @@ ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7864,39 +7934,51 @@ ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7907,39 +7989,51 @@ ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7950,39 +8044,51 @@ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -7993,7 +8099,12 @@ ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -8003,16 +8114,16 @@ ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8023,7 +8134,12 @@ ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -8033,16 +8149,16 @@ ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 1) @@ -8099,36 +8215,45 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8139,39 +8264,51 @@ ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8215,36 +8352,45 @@ ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8282,36 +8428,45 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8322,7 +8477,12 @@ ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -8332,16 +8492,16 @@ ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8379,7 +8539,12 @@ ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -8389,16 +8554,16 @@ ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-MESA: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX9-MESA: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 1) @@ -9139,13 +9304,6 @@ ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -9153,24 +9311,50 @@ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -9186,39 +9370,58 @@ ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -9234,39 +9437,58 @@ ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -9282,39 +9504,58 @@ ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -9330,14 +9571,6 @@ ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9345,11 +9578,27 @@ ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -9365,14 +9614,6 @@ ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9380,11 +9621,27 @@ ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 8, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -9414,13 +9671,6 @@ ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -9428,24 +9678,50 @@ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -9461,39 +9737,58 @@ ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -9509,39 +9804,58 @@ ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -9557,39 +9871,58 @@ ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -9605,14 +9938,6 @@ ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9620,11 +9945,27 @@ ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -9640,14 +9981,6 @@ ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9655,11 +9988,27 @@ ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 4, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -9692,39 +10041,58 @@ ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -9740,39 +10108,58 @@ ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -9788,39 +10175,58 @@ ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -9836,39 +10242,58 @@ ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -9884,14 +10309,6 @@ ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9899,11 +10316,27 @@ ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -9919,14 +10352,6 @@ ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9934,11 +10359,27 @@ ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 2, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -10023,34 +10464,48 @@ ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C7]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32) ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C7]](s32) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C7]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL8]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C7]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL9]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL10]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) @@ -10066,39 +10521,58 @@ ; CI-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C5]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -10166,34 +10640,48 @@ ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C7]](s32) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C7]](s32) ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C7]](s32) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C7]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL8]] + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C7]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL9]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL10]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -10251,34 +10739,48 @@ ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL5]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL6]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL7]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C6]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL8]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C6]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL9]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C8]] + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C6]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL10]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST6]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) @@ -10294,14 +10796,6 @@ ; GFX9-HSA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-HSA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; GFX9-HSA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -10309,11 +10803,27 @@ ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -10370,15 +10880,7 @@ ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) @@ -10386,11 +10888,27 @@ ; GFX9-MESA: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX9-MESA: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<6 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[ANYEXT6]](s32) + ; GFX9-MESA: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT7]](s32), [[ANYEXT8]](s32) + ; GFX9-MESA: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[COPY1]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT9]](s32), [[COPY1]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 1, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -11373,47 +11891,73 @@ ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -11435,47 +11979,73 @@ ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; CI-HSA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -11497,47 +12067,73 @@ ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -11559,47 +12155,73 @@ ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -11621,14 +12243,6 @@ ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -11639,12 +12253,31 @@ ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 8, addrspace 1) @@ -11666,14 +12299,6 @@ ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -11684,12 +12309,31 @@ ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 8, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF @@ -11729,47 +12373,73 @@ ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -11791,47 +12461,73 @@ ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; CI-HSA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -11853,47 +12549,73 @@ ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CI-MESA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-MESA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-MESA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CI-MESA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-MESA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; CI-MESA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -11915,47 +12637,73 @@ ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -11977,14 +12725,6 @@ ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -11995,12 +12735,31 @@ ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) @@ -12022,14 +12781,6 @@ ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -12040,12 +12791,31 @@ ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 4, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF @@ -12242,47 +13012,73 @@ ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; CI-HSA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -12618,14 +13414,6 @@ ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -12636,12 +13424,31 @@ ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) @@ -12995,47 +13802,73 @@ ; CI-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-HSA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] - ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C6]] + ; CI-HSA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-HSA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; CI-HSA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CI-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CI-HSA: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C7]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C6]] + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-HSA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C6]] + ; CI-HSA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CI-HSA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-HSA: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-HSA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-HSA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C6]] + ; CI-HSA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C7]](s32) + ; CI-HSA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CI-HSA: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-HSA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C6]] + ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-HSA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C6]] + ; CI-HSA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s32) + ; CI-HSA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CI-HSA: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-HSA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C6]] + ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI-HSA: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C6]] + ; CI-HSA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C7]](s32) + ; CI-HSA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL7]] + ; CI-HSA: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-HSA: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; CI-HSA: $vgpr1 = COPY [[BITCAST6]](<2 x s16>) + ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) + ; CI-HSA: $vgpr3 = COPY [[BITCAST8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -13473,14 +14306,6 @@ ; GFX9-HSA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; GFX9-HSA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -13491,12 +14316,31 @@ ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[DEF3:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-HSA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -10346,39 +10346,51 @@ ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10389,39 +10401,51 @@ ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10432,39 +10456,51 @@ ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-DS128: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-DS128: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI-DS128: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI-DS128: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10475,39 +10511,51 @@ ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10518,7 +10566,12 @@ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10528,16 +10581,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10548,7 +10601,12 @@ ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10558,16 +10616,16 @@ ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10578,7 +10636,12 @@ ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10588,16 +10651,16 @@ ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10608,7 +10671,12 @@ ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10618,16 +10686,16 @@ ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 3) @@ -10684,36 +10752,45 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10757,36 +10834,45 @@ ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10830,36 +10916,45 @@ ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI-DS128: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-DS128: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI-DS128: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; CI-DS128: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; CI-DS128: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; CI-DS128: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CI-DS128: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10897,36 +10992,45 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10964,7 +11068,12 @@ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10974,16 +11083,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -10994,7 +11103,12 @@ ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) - ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -11004,16 +11118,16 @@ ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -11051,7 +11165,12 @@ ; GFX10: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX10: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -11061,16 +11180,16 @@ ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX10: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX10: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -11108,7 +11227,12 @@ ; GFX10-UNALIGNED: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10-UNALIGNED: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -11118,16 +11242,16 @@ ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX10-UNALIGNED: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX10-UNALIGNED: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX10-UNALIGNED: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX10-UNALIGNED: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX10-UNALIGNED: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -5930,15 +5930,6 @@ ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -5946,22 +5937,43 @@ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5972,15 +5984,6 @@ ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -5988,22 +5991,43 @@ ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6014,15 +6038,6 @@ ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -6030,22 +6045,43 @@ ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6056,7 +6092,12 @@ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6065,16 +6106,16 @@ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5) @@ -6098,39 +6139,51 @@ ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6141,39 +6194,51 @@ ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6184,39 +6249,51 @@ ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C3]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6227,7 +6304,12 @@ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6237,16 +6319,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 5) @@ -6303,36 +6385,45 @@ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6376,36 +6467,45 @@ ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) - ; CI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C7]], [[C6]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; CI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C6]](s32) + ; CI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C6]](s32) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; CI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; CI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C8]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; CI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6443,36 +6543,45 @@ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C5]](s32) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C5]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C5]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL7]] + ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6510,7 +6619,12 @@ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6520,16 +6634,16 @@ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT3]](s32), [[ANYEXT4]](s32) + ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT5]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -787,37 +787,49 @@ ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL2]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL2]] ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL3]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL4]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -842,35 +854,44 @@ ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR6]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR5]](s16) + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]] ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR6]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL3]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -891,7 +912,12 @@ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32) ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) @@ -899,16 +925,16 @@ ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -231,89 +231,101 @@ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY7]] - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 16 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 16 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 16 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 16 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 16 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 16 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 16 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY15]], 16 ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 16 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 16 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY16]], 16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY17]], 16 ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY15]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY16]], 16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY17]] - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY18]], 16 - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] + ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY18]], 16 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY19]] + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY20]], 16 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY21]] + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY22]], 16 + ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C2]] ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C2]] - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C1]] + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C1]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C1]] + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C1]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C1]] + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C1]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]] - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]] - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]] + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]] + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -513,15 +513,6 @@ ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR]](s32) ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] @@ -529,22 +520,36 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] - ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY9]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] - ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] + ; GFX6: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]] + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -589,35 +594,40 @@ ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] ; GFX8: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; GFX8: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -1304,35 +1304,44 @@ ; CHECK: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC3]] ; CHECK: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC4]] ; CHECK: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]] ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL3]] + ; CHECK: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] + ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -763,35 +763,44 @@ ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C1]](s16) ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[C1]](s16) - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) + ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX8: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX8: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL7]] + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX6-LABEL: name: test_sext_inreg_v3s16_1 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -807,15 +816,6 @@ ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 1 ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 1 - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] @@ -823,22 +823,43 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32) ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -629,37 +629,49 @@ ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL3]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL4]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C1]] + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]] + ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -684,35 +696,44 @@ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL5]] ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL6]] + ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL7]] + ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -733,7 +754,12 @@ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32) ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) @@ -741,16 +767,16 @@ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[COPY6]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -378,12 +378,25 @@ ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMAX2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SMAX2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -409,8 +422,17 @@ ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -378,12 +378,25 @@ ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMIN2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SMIN2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -409,8 +422,17 @@ ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -563,37 +563,42 @@ ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] - ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY9]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL8]] + ; GFX6: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL9]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -635,35 +640,40 @@ ; GFX8: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX8: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX8: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -705,7 +715,12 @@ ; GFX9: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX9: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -714,16 +729,13 @@ ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -231,89 +231,101 @@ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY6]], [[COPY7]] - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 16 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 16 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 16 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 16 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 16 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 16 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 16 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY15]], 16 ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 16 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 16 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY16]], 16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY17]], 16 ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY14]], 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY15]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY16]], 16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY17]] - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY18]], 16 - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] + ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY18]], 16 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY19]] + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY20]], 16 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY21]] + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY22]], 16 + ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C2]] ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C2]] - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C2]] - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C1]] + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C1]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[SUB2]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C1]] + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C1]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C1]] + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C1]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C3]] - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C3]] - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C3]] + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C3]] + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -513,15 +513,6 @@ ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ASHR]](s32) ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] @@ -529,22 +520,36 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] - ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY9]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] - ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] + ; GFX6: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C4]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -589,35 +594,40 @@ ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] ; GFX8: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; GFX8: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -195,56 +195,65 @@ ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC6]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC7]] ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC8]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]] ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL3]] + ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] + ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -404,15 +404,6 @@ ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] @@ -420,22 +411,36 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] - ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY9]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] - ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C2]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] + ; GFX6: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -459,35 +464,40 @@ ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; GFX8: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -381,12 +381,24 @@ ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMAX2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UMAX2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -412,8 +424,17 @@ ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -381,12 +381,24 @@ ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; SI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMIN2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] + ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UMIN2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -412,8 +424,17 @@ ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -483,37 +483,42 @@ ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX6: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] - ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY9]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL8]] + ; GFX6: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL9]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -547,35 +552,40 @@ ; GFX8: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX8: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -609,7 +619,12 @@ ; GFX9: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -618,16 +633,13 @@ ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -199,64 +199,73 @@ ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC3]] ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC4]] ; CHECK: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s16), [[TRUNC9]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC7]](s16), [[TRUNC10]] ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC11]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL2]] ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL3]] + ; CHECK: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL4]] + ; CHECK: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -388,15 +388,6 @@ ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX6: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] @@ -404,22 +395,36 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] - ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY9]](s32) + ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] - ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX6: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX6: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] + ; GFX6: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]] + ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] + ; GFX6: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -443,35 +448,40 @@ ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX8: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY1]](s32) ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX8: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST6]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL2]] + ; GFX8: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL3]] + ; GFX8: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -535,10 +535,7 @@ ; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 ; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: @@ -555,10 +552,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: load_1d_v3f16_xyz: @@ -571,15 +565,11 @@ ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 +; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0 ; GFX10-NEXT: ; return to shader part epilog %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -140,9 +140,9 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] ; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: s_mov_b32 s5, s4 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16 ; GFX8-NEXT: s_and_b32 s2, s0, s4 -; GFX8-NEXT: s_mov_b32 s5, s4 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16 ; GFX8-NEXT: s_and_b32 s6, s1, s4 ; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]