diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -876,6 +876,31 @@ return true; } + // Recognize UnmergeSrc that can be unmerged to DstTy directly. + // Types have to be either both vector or both non-vector types. + // Merge-like opcodes are combined one at the time. First one creates new + // unmerge, following should use the same unmerge (builder performs CSE). + // + // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) + // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 + // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 + // + // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc + if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && + (Elt0UnmergeIdx % NumMIElts == 0) && + getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { + if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, + EltSize)) + return false; + MIB.setInstrAndDebugLoc(MI); + auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); + unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); + replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, + UpdatedDefs, Observer); + DeadInsts.push_back(&MI); + return true; + } + return false; } }; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -66,8 +66,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) @@ -324,9 +322,9 @@ ; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[AND63]], [[C7]](s64) ; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]] ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[OR55]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8) - ; CHECK-NEXT: $x0 = COPY [[MV]](s64) - ; CHECK-NEXT: $x1 = COPY [[MV1]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8) + ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) + ; CHECK-NEXT: $x1 = COPY [[MV]](s64) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s64) = COPY $x2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -232,29 +232,24 @@ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C2]](s64) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[C2]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C3]](s64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UADDO]], [[C3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C2]](s64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C4]](s64) ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s32) = G_UADDO [[ASHR1]], [[C]] - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[MV6]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[UADDO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -232,29 +232,24 @@ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]] ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBO]](s64) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C2]](s64) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[C2]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C3]](s64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[USUBO]], [[C3]](s64) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C2]](s64) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C4]](s64) ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[ASHR1]], [[C]] - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[MV6]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[USUBO]] ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir @@ -132,11 +132,9 @@ ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[UV]](<2 x s32>) + ; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[UV1]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0(<4 x s32>) %5:_(<2 x s32>) = G_BUILD_VECTOR %1, %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -9,46 +9,55 @@ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-NEXT: v_lshlrev_b32_e32 v64, 8, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] -; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16 -; GCN-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32 -; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 -; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 -; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 -; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 -; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 -; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 -; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 -; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 -; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:192 -; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:208 -; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:224 -; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:240 -; GCN-NEXT: s_waitcnt vmcnt(6) +; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] +; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:16 +; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:32 +; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:48 +; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:64 +; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:80 +; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:96 +; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:112 +; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:128 +; GCN-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160 +; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176 +; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192 +; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208 +; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224 +; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240 +; GCN-NEXT: s_waitcnt vmcnt(15) ; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7 -; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] offset:128 ; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:160 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:176 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:192 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:208 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:224 -; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] -; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 -; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 -; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:48 -; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:64 -; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 -; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 -; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 -; GCN-NEXT: s_waitcnt vmcnt(15) -; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:240 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:128 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:176 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:192 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:208 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:224 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:240 ; GCN-NEXT: s_endpgm ; ; GFX10-LABEL: v_insert_v64i32_37: @@ -57,46 +66,40 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v64, 8, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0xf -; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] -; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 -; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] +; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:112 +; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:160 +; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:176 +; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:192 +; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:208 +; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:224 +; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:240 +; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:160 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:192 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:208 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:224 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:240 -; GFX10-NEXT: s_waitcnt vmcnt(6) +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, 0x3e7 -; GFX10-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] offset:128 +; GFX10-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3] +; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16 +; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32 +; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48 +; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64 +; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80 +; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96 +; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112 +; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:128 ; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144 -; GFX10-NEXT: s_waitcnt vmcnt(5) -; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:160 -; GFX10-NEXT: s_waitcnt vmcnt(4) -; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:176 -; GFX10-NEXT: s_waitcnt vmcnt(3) -; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:192 -; GFX10-NEXT: s_waitcnt vmcnt(2) -; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:208 -; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] -; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 -; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 -; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:48 -; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:64 -; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 -; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 -; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:224 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:240 +; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:160 +; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:176 +; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:192 +; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:208 +; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:224 +; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:240 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: v_insert_v64i32_37: @@ -104,57 +107,48 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_lshlrev_b32_e32 v64, 8, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_clause 0xa -; GFX11-NEXT: global_load_b128 v[0:3], v64, s[0:1] offset:128 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: global_load_b128 v[0:3], v64, s[0:1] +; GFX11-NEXT: global_load_b128 v[8:11], v64, s[0:1] offset:16 +; GFX11-NEXT: global_load_b128 v[12:15], v64, s[0:1] offset:32 +; GFX11-NEXT: global_load_b128 v[16:19], v64, s[0:1] offset:48 +; GFX11-NEXT: global_load_b128 v[20:23], v64, s[0:1] offset:64 +; GFX11-NEXT: global_load_b128 v[24:27], v64, s[0:1] offset:80 +; GFX11-NEXT: global_load_b128 v[28:31], v64, s[0:1] offset:96 +; GFX11-NEXT: global_load_b128 v[32:35], v64, s[0:1] offset:112 +; GFX11-NEXT: global_load_b128 v[36:39], v64, s[0:1] offset:128 ; GFX11-NEXT: global_load_b128 v[4:7], v64, s[0:1] offset:144 -; GFX11-NEXT: global_load_b128 v[8:11], v64, s[0:1] offset:160 -; GFX11-NEXT: global_load_b128 v[32:35], v64, s[0:1] -; GFX11-NEXT: global_load_b128 v[36:39], v64, s[0:1] offset:16 -; GFX11-NEXT: global_load_b128 v[40:43], v64, s[0:1] offset:32 -; GFX11-NEXT: global_load_b128 v[44:47], v64, s[0:1] offset:48 -; GFX11-NEXT: global_load_b128 v[48:51], v64, s[0:1] offset:64 -; GFX11-NEXT: global_load_b128 v[52:55], v64, s[0:1] offset:80 -; GFX11-NEXT: global_load_b128 v[56:59], v64, s[0:1] offset:96 -; GFX11-NEXT: global_load_b128 v[60:63], v64, s[0:1] offset:112 -; GFX11-NEXT: s_waitcnt vmcnt(9) +; GFX11-NEXT: global_load_b128 v[40:43], v64, s[0:1] offset:160 +; GFX11-NEXT: global_load_b128 v[44:47], v64, s[0:1] offset:176 +; GFX11-NEXT: global_load_b128 v[48:51], v64, s[0:1] offset:192 +; GFX11-NEXT: global_load_b128 v[52:55], v64, s[0:1] offset:208 +; GFX11-NEXT: global_load_b128 v[56:59], v64, s[0:1] offset:224 +; GFX11-NEXT: global_load_b128 v[60:63], v64, s[0:1] offset:240 +; GFX11-NEXT: s_waitcnt vmcnt(6) ; GFX11-NEXT: v_mov_b32_e32 v5, 0x3e7 -; GFX11-NEXT: s_clause 0x4 -; GFX11-NEXT: global_load_b128 v[12:15], v64, s[0:1] offset:176 -; GFX11-NEXT: global_load_b128 v[28:31], v64, s[0:1] offset:192 -; GFX11-NEXT: global_load_b128 v[24:27], v64, s[0:1] offset:208 -; GFX11-NEXT: global_load_b128 v[20:23], v64, s[0:1] offset:224 -; GFX11-NEXT: global_load_b128 v[16:19], v64, s[0:1] offset:240 -; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_store_b128 v64, v[0:3], s[2:3] offset:128 +; GFX11-NEXT: s_clause 0x9 +; GFX11-NEXT: global_store_b128 v64, v[0:3], s[2:3] +; GFX11-NEXT: global_store_b128 v64, v[8:11], s[2:3] offset:16 +; GFX11-NEXT: global_store_b128 v64, v[12:15], s[2:3] offset:32 +; GFX11-NEXT: global_store_b128 v64, v[16:19], s[2:3] offset:48 +; GFX11-NEXT: global_store_b128 v64, v[20:23], s[2:3] offset:64 +; GFX11-NEXT: global_store_b128 v64, v[24:27], s[2:3] offset:80 +; GFX11-NEXT: global_store_b128 v64, v[28:31], s[2:3] offset:96 +; GFX11-NEXT: global_store_b128 v64, v[32:35], s[2:3] offset:112 +; GFX11-NEXT: global_store_b128 v64, v[36:39], s[2:3] offset:128 ; GFX11-NEXT: global_store_b128 v64, v[4:7], s[2:3] offset:144 -; GFX11-NEXT: s_waitcnt vmcnt(13) -; GFX11-NEXT: global_store_b128 v64, v[8:11], s[2:3] offset:160 -; GFX11-NEXT: s_waitcnt vmcnt(12) -; GFX11-NEXT: global_store_b128 v64, v[32:35], s[2:3] -; GFX11-NEXT: s_waitcnt vmcnt(11) -; GFX11-NEXT: global_store_b128 v64, v[36:39], s[2:3] offset:16 -; GFX11-NEXT: s_waitcnt vmcnt(10) -; GFX11-NEXT: global_store_b128 v64, v[40:43], s[2:3] offset:32 -; GFX11-NEXT: s_waitcnt vmcnt(9) -; GFX11-NEXT: global_store_b128 v64, v[44:47], s[2:3] offset:48 -; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: global_store_b128 v64, v[48:51], s[2:3] offset:64 -; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: global_store_b128 v64, v[52:55], s[2:3] offset:80 -; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: global_store_b128 v64, v[56:59], s[2:3] offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: global_store_b128 v64, v[60:63], s[2:3] offset:112 +; GFX11-NEXT: global_store_b128 v64, v[40:43], s[2:3] offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: global_store_b128 v64, v[12:15], s[2:3] offset:176 +; GFX11-NEXT: global_store_b128 v64, v[44:47], s[2:3] offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: global_store_b128 v64, v[28:31], s[2:3] offset:192 +; GFX11-NEXT: global_store_b128 v64, v[48:51], s[2:3] offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: global_store_b128 v64, v[24:27], s[2:3] offset:208 +; GFX11-NEXT: global_store_b128 v64, v[52:55], s[2:3] offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: global_store_b128 v64, v[20:23], s[2:3] offset:224 +; GFX11-NEXT: global_store_b128 v64, v[56:59], s[2:3] offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b128 v64, v[16:19], s[2:3] offset:240 +; GFX11-NEXT: global_store_b128 v64, v[60:63], s[2:3] offset:240 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -903,40 +903,32 @@ ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9PLUS-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[UV3]], [[UV9]](<2 x s16>) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[TRUNC8]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC9]](s16), [[TRUNC10]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[TRUNC4]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -372,194 +372,7 @@ ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV5]](s128), 0 ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[COPY16]], [[EXTRACT]] ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND6]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C20]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C5]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C6]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C7]](s16) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C8]](s16) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C9]](s16) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C10]](s16) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C11]](s16) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C12]](s16) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C13]](s16) - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C14]](s16) - ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C15]](s16) - ; CHECK-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C16]](s16) - ; CHECK-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C17]](s16) - ; CHECK-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C18]](s16) - ; CHECK-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C19]](s16) - ; CHECK-NEXT: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C5]](s16) - ; CHECK-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C6]](s16) - ; CHECK-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C7]](s16) - ; CHECK-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C8]](s16) - ; CHECK-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C9]](s16) - ; CHECK-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C10]](s16) - ; CHECK-NEXT: [[LSHR24:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C11]](s16) - ; CHECK-NEXT: [[LSHR25:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C12]](s16) - ; CHECK-NEXT: [[LSHR26:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C13]](s16) - ; CHECK-NEXT: [[LSHR27:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C14]](s16) - ; CHECK-NEXT: [[LSHR28:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C15]](s16) - ; CHECK-NEXT: [[LSHR29:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C16]](s16) - ; CHECK-NEXT: [[LSHR30:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C17]](s16) - ; CHECK-NEXT: [[LSHR31:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C18]](s16) - ; CHECK-NEXT: [[LSHR32:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C19]](s16) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C22]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C22]] - ; CHECK-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C22]](s32) - ; CHECK-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL19]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C22]] - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C23]](s32) - ; CHECK-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C22]] - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C24]](s32) - ; CHECK-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C22]] - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C25]](s32) - ; CHECK-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C22]] - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C26]](s32) - ; CHECK-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C22]] - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C27]](s32) - ; CHECK-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C22]] - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C28]](s32) - ; CHECK-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C22]] - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C29]](s32) - ; CHECK-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C22]] - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C30]](s32) - ; CHECK-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR12]](s16) - ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C22]] - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C31]](s32) - ; CHECK-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C22]] - ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C32]](s32) - ; CHECK-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C22]] - ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C33]](s32) - ; CHECK-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR15]](s16) - ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C22]] - ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C34]](s32) - ; CHECK-NEXT: [[OR31:%[0-9]+]]:_(s32) = G_OR [[OR30]], [[SHL31]] - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C22]] - ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK-NEXT: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C35]](s32) - ; CHECK-NEXT: [[OR32:%[0-9]+]]:_(s32) = G_OR [[OR31]], [[SHL32]] - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C22]] - ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK-NEXT: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C36]](s32) - ; CHECK-NEXT: [[OR33:%[0-9]+]]:_(s32) = G_OR [[OR32]], [[SHL33]] - ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C22]] - ; CHECK-NEXT: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C20]](s32) - ; CHECK-NEXT: [[OR34:%[0-9]+]]:_(s32) = G_OR [[OR33]], [[SHL34]] - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR18]](s16) - ; CHECK-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C22]] - ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C37]](s32) - ; CHECK-NEXT: [[OR35:%[0-9]+]]:_(s32) = G_OR [[OR34]], [[SHL35]] - ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) - ; CHECK-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[ANYEXT16]], [[C22]] - ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C38]](s32) - ; CHECK-NEXT: [[OR36:%[0-9]+]]:_(s32) = G_OR [[OR35]], [[SHL36]] - ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) - ; CHECK-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[ANYEXT17]], [[C22]] - ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK-NEXT: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C39]](s32) - ; CHECK-NEXT: [[OR37:%[0-9]+]]:_(s32) = G_OR [[OR36]], [[SHL37]] - ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR21]](s16) - ; CHECK-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[ANYEXT18]], [[C22]] - ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C40]](s32) - ; CHECK-NEXT: [[OR38:%[0-9]+]]:_(s32) = G_OR [[OR37]], [[SHL38]] - ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) - ; CHECK-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[ANYEXT19]], [[C22]] - ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK-NEXT: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C41]](s32) - ; CHECK-NEXT: [[OR39:%[0-9]+]]:_(s32) = G_OR [[OR38]], [[SHL39]] - ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) - ; CHECK-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[ANYEXT20]], [[C22]] - ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK-NEXT: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C42]](s32) - ; CHECK-NEXT: [[OR40:%[0-9]+]]:_(s32) = G_OR [[OR39]], [[SHL40]] - ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR24]](s16) - ; CHECK-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[ANYEXT21]], [[C22]] - ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK-NEXT: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C43]](s32) - ; CHECK-NEXT: [[OR41:%[0-9]+]]:_(s32) = G_OR [[OR40]], [[SHL41]] - ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR25]](s16) - ; CHECK-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[ANYEXT22]], [[C22]] - ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C44]](s32) - ; CHECK-NEXT: [[OR42:%[0-9]+]]:_(s32) = G_OR [[OR41]], [[SHL42]] - ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR26]](s16) - ; CHECK-NEXT: [[AND32:%[0-9]+]]:_(s32) = G_AND [[ANYEXT23]], [[C22]] - ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[AND32]], [[C45]](s32) - ; CHECK-NEXT: [[OR43:%[0-9]+]]:_(s32) = G_OR [[OR42]], [[SHL43]] - ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR27]](s16) - ; CHECK-NEXT: [[AND33:%[0-9]+]]:_(s32) = G_AND [[ANYEXT24]], [[C22]] - ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK-NEXT: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C46]](s32) - ; CHECK-NEXT: [[OR44:%[0-9]+]]:_(s32) = G_OR [[OR43]], [[SHL44]] - ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR28]](s16) - ; CHECK-NEXT: [[AND34:%[0-9]+]]:_(s32) = G_AND [[ANYEXT25]], [[C22]] - ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK-NEXT: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[AND34]], [[C47]](s32) - ; CHECK-NEXT: [[OR45:%[0-9]+]]:_(s32) = G_OR [[OR44]], [[SHL45]] - ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR29]](s16) - ; CHECK-NEXT: [[AND35:%[0-9]+]]:_(s32) = G_AND [[ANYEXT26]], [[C22]] - ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C48]](s32) - ; CHECK-NEXT: [[OR46:%[0-9]+]]:_(s32) = G_OR [[OR45]], [[SHL46]] - ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR30]](s16) - ; CHECK-NEXT: [[AND36:%[0-9]+]]:_(s32) = G_AND [[ANYEXT27]], [[C22]] - ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK-NEXT: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[AND36]], [[C49]](s32) - ; CHECK-NEXT: [[OR47:%[0-9]+]]:_(s32) = G_OR [[OR46]], [[SHL47]] - ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR31]](s16) - ; CHECK-NEXT: [[AND37:%[0-9]+]]:_(s32) = G_AND [[ANYEXT28]], [[C22]] - ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C50]](s32) - ; CHECK-NEXT: [[OR48:%[0-9]+]]:_(s32) = G_OR [[OR47]], [[SHL48]] - ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR32]](s16) - ; CHECK-NEXT: [[AND38:%[0-9]+]]:_(s32) = G_AND [[ANYEXT29]], [[C22]] - ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK-NEXT: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[AND38]], [[C51]](s32) - ; CHECK-NEXT: [[OR49:%[0-9]+]]:_(s32) = G_OR [[OR48]], [[SHL49]] - ; CHECK-NEXT: $vgpr0 = COPY [[OR49]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](s32) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s65) = G_TRUNC %0 %2:_(s65) = G_CTPOP %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -555,54 +555,15 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C5]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR6]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL6]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR1]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<8 x s8>) = G_BITCAST %0 @@ -625,31 +586,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 %2:_(<8 x s8>) = G_BITCAST %0 @@ -670,31 +609,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C3]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(<8 x s8>) = G_BITCAST %0 @@ -715,31 +633,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 3 %2:_(<8 x s8>) = G_BITCAST %0 @@ -760,32 +657,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C5]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 4 %2:_(<8 x s8>) = G_BITCAST %0 @@ -806,31 +681,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C3]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 5 %2:_(<8 x s8>) = G_BITCAST %0 @@ -851,31 +705,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C4]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LSHR]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<8 x s8>) = G_BITCAST %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -363,9 +363,8 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[UV]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -399,9 +398,8 @@ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[UV1]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -605,16 +603,7 @@ ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -627,16 +616,7 @@ ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -254,102 +254,83 @@ ; SI-LABEL: name: test_fabs_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]] - ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -497,45 +497,35 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV]], [[UV3]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -320,23 +320,18 @@ ; GFX9-LABEL: name: test_fcanonicalize_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCANONICALIZE %0 %2:_(<3 x s32>) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -569,54 +569,39 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR2]], [[BUILD_VECTOR4]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR1]], [[BUILD_VECTOR3]], [[BUILD_VECTOR5]] - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC9]](s16), [[TRUNC10]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC11]](s16), [[TRUNC12]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC13]](s16), [[TRUNC14]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>), [[BUILD_VECTOR8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -564,49 +564,39 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -564,49 +564,39 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR3]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -484,45 +484,35 @@ ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV]], [[UV3]] + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9PLUS-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9PLUS-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -252,83 +252,64 @@ ; SI-LABEL: name: test_fneg_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fneg_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]] - ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fneg_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]] + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 %2:_(<3 x s32>) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir @@ -9,431 +9,11 @@ ; TAHITI-LABEL: name: test_implicit_def_s1025 ; TAHITI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF ; TAHITI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; TAHITI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; TAHITI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; TAHITI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; TAHITI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; TAHITI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; TAHITI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY]](s32) - ; TAHITI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; TAHITI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; TAHITI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY1]](s32) - ; TAHITI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; TAHITI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; TAHITI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY2]](s32) - ; TAHITI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; TAHITI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; TAHITI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) - ; TAHITI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; TAHITI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; TAHITI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY4]](s32) - ; TAHITI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; TAHITI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C7]](s32) - ; TAHITI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY5]](s32) - ; TAHITI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; TAHITI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; TAHITI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; TAHITI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; TAHITI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; TAHITI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY7]](s32) - ; TAHITI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; TAHITI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; TAHITI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY8]](s32) - ; TAHITI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; TAHITI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; TAHITI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY9]](s32) - ; TAHITI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; TAHITI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C12]](s32) - ; TAHITI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY10]](s32) - ; TAHITI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; TAHITI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C13]](s32) - ; TAHITI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY11]](s32) - ; TAHITI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; TAHITI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C14]](s32) - ; TAHITI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY12]](s32) - ; TAHITI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; TAHITI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C15]](s32) - ; TAHITI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY13]](s32) - ; TAHITI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; TAHITI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C16]](s32) - ; TAHITI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; TAHITI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY14]](s32) - ; TAHITI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; TAHITI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY15]](s32) - ; TAHITI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; TAHITI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY16]](s32) - ; TAHITI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; TAHITI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY17]](s32) - ; TAHITI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; TAHITI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY18]](s32) - ; TAHITI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; TAHITI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[COPY19]](s32) - ; TAHITI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C7]](s32) - ; TAHITI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND20]], [[COPY20]](s32) - ; TAHITI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; TAHITI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY21]](s32) - ; TAHITI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; TAHITI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY22]](s32) - ; TAHITI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; TAHITI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[COPY23]](s32) - ; TAHITI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; TAHITI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND24]], [[COPY24]](s32) - ; TAHITI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C12]](s32) - ; TAHITI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[COPY25]](s32) - ; TAHITI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C13]](s32) - ; TAHITI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY26]](s32) - ; TAHITI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C14]](s32) - ; TAHITI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[COPY27]](s32) - ; TAHITI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C15]](s32) - ; TAHITI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY28]](s32) - ; TAHITI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C16]](s32) - ; TAHITI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; TAHITI-NEXT: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[COPY29]](s32) - ; TAHITI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; TAHITI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; TAHITI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C1]](s32) - ; TAHITI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL]] - ; TAHITI-NEXT: [[AND32:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; TAHITI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND32]], [[C3]](s32) - ; TAHITI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; TAHITI-NEXT: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; TAHITI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C4]](s32) - ; TAHITI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; TAHITI-NEXT: [[AND34:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; TAHITI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND34]], [[C5]](s32) - ; TAHITI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; TAHITI-NEXT: [[AND35:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; TAHITI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C6]](s32) - ; TAHITI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; TAHITI-NEXT: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; TAHITI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND36]], [[C7]](s32) - ; TAHITI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; TAHITI-NEXT: [[AND37:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; TAHITI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C8]](s32) - ; TAHITI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; TAHITI-NEXT: [[AND38:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; TAHITI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND38]], [[C9]](s32) - ; TAHITI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; TAHITI-NEXT: [[AND39:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; TAHITI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C10]](s32) - ; TAHITI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; TAHITI-NEXT: [[AND40:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] - ; TAHITI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND40]], [[C11]](s32) - ; TAHITI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; TAHITI-NEXT: [[AND41:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] - ; TAHITI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND41]], [[C12]](s32) - ; TAHITI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; TAHITI-NEXT: [[AND42:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] - ; TAHITI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND42]], [[C13]](s32) - ; TAHITI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; TAHITI-NEXT: [[AND43:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C1]] - ; TAHITI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND43]], [[C14]](s32) - ; TAHITI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; TAHITI-NEXT: [[AND44:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] - ; TAHITI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND44]], [[C15]](s32) - ; TAHITI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; TAHITI-NEXT: [[AND45:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C1]] - ; TAHITI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND45]], [[C16]](s32) - ; TAHITI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; TAHITI-NEXT: [[AND46:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; TAHITI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND46]], [[C]](s32) - ; TAHITI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; TAHITI-NEXT: [[AND47:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] - ; TAHITI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; TAHITI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND47]], [[C17]](s32) - ; TAHITI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; TAHITI-NEXT: [[AND48:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]] - ; TAHITI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; TAHITI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND48]], [[C18]](s32) - ; TAHITI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; TAHITI-NEXT: [[AND49:%[0-9]+]]:_(s32) = G_AND [[LSHR18]], [[C1]] - ; TAHITI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; TAHITI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND49]], [[C19]](s32) - ; TAHITI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; TAHITI-NEXT: [[AND50:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]] - ; TAHITI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; TAHITI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND50]], [[C20]](s32) - ; TAHITI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; TAHITI-NEXT: [[AND51:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C1]] - ; TAHITI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; TAHITI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND51]], [[C21]](s32) - ; TAHITI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; TAHITI-NEXT: [[AND52:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; TAHITI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; TAHITI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND52]], [[C22]](s32) - ; TAHITI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; TAHITI-NEXT: [[AND53:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]] - ; TAHITI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; TAHITI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND53]], [[C23]](s32) - ; TAHITI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; TAHITI-NEXT: [[AND54:%[0-9]+]]:_(s32) = G_AND [[LSHR23]], [[C1]] - ; TAHITI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; TAHITI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND54]], [[C24]](s32) - ; TAHITI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; TAHITI-NEXT: [[AND55:%[0-9]+]]:_(s32) = G_AND [[LSHR24]], [[C1]] - ; TAHITI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; TAHITI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND55]], [[C25]](s32) - ; TAHITI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; TAHITI-NEXT: [[AND56:%[0-9]+]]:_(s32) = G_AND [[LSHR25]], [[C1]] - ; TAHITI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; TAHITI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND56]], [[C26]](s32) - ; TAHITI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; TAHITI-NEXT: [[AND57:%[0-9]+]]:_(s32) = G_AND [[LSHR26]], [[C1]] - ; TAHITI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; TAHITI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND57]], [[C27]](s32) - ; TAHITI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; TAHITI-NEXT: [[AND58:%[0-9]+]]:_(s32) = G_AND [[LSHR27]], [[C1]] - ; TAHITI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; TAHITI-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND58]], [[C28]](s32) - ; TAHITI-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; TAHITI-NEXT: [[AND59:%[0-9]+]]:_(s32) = G_AND [[LSHR28]], [[C1]] - ; TAHITI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; TAHITI-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND59]], [[C29]](s32) - ; TAHITI-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; TAHITI-NEXT: [[AND60:%[0-9]+]]:_(s32) = G_AND [[LSHR29]], [[C1]] - ; TAHITI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; TAHITI-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND60]], [[C30]](s32) - ; TAHITI-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; TAHITI-NEXT: [[AND61:%[0-9]+]]:_(s32) = G_AND [[LSHR30]], [[C1]] - ; TAHITI-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; TAHITI-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND61]], [[C31]](s32) - ; TAHITI-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; TAHITI-NEXT: $vgpr0 = COPY [[OR30]](s32) + ; TAHITI-NEXT: $vgpr0 = COPY [[UV]](s32) ; FIJI-LABEL: name: test_implicit_def_s1025 ; FIJI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF ; FIJI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; FIJI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; FIJI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; FIJI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; FIJI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; FIJI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; FIJI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; FIJI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; FIJI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) - ; FIJI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 - ; FIJI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C3]](s16) - ; FIJI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; FIJI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C4]](s16) - ; FIJI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; FIJI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C5]](s16) - ; FIJI-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 6 - ; FIJI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C6]](s16) - ; FIJI-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 7 - ; FIJI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C7]](s16) - ; FIJI-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; FIJI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C8]](s16) - ; FIJI-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; FIJI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C9]](s16) - ; FIJI-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; FIJI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C10]](s16) - ; FIJI-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 11 - ; FIJI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C11]](s16) - ; FIJI-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 - ; FIJI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C12]](s16) - ; FIJI-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 13 - ; FIJI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C13]](s16) - ; FIJI-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 14 - ; FIJI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C14]](s16) - ; FIJI-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; FIJI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C15]](s16) - ; FIJI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; FIJI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; FIJI-NEXT: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C3]](s16) - ; FIJI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) - ; FIJI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C5]](s16) - ; FIJI-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C6]](s16) - ; FIJI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C7]](s16) - ; FIJI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C8]](s16) - ; FIJI-NEXT: [[LSHR24:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C9]](s16) - ; FIJI-NEXT: [[LSHR25:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C10]](s16) - ; FIJI-NEXT: [[LSHR26:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C11]](s16) - ; FIJI-NEXT: [[LSHR27:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C12]](s16) - ; FIJI-NEXT: [[LSHR28:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C13]](s16) - ; FIJI-NEXT: [[LSHR29:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C14]](s16) - ; FIJI-NEXT: [[LSHR30:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C15]](s16) - ; FIJI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; FIJI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C16]] - ; FIJI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; FIJI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C16]] - ; FIJI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C16]](s32) - ; FIJI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; FIJI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; FIJI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C16]] - ; FIJI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; FIJI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C17]](s32) - ; FIJI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; FIJI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; FIJI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C16]] - ; FIJI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; FIJI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C18]](s32) - ; FIJI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; FIJI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; FIJI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C16]] - ; FIJI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; FIJI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C19]](s32) - ; FIJI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; FIJI-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; FIJI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C16]] - ; FIJI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; FIJI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C20]](s32) - ; FIJI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; FIJI-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; FIJI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C16]] - ; FIJI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; FIJI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C21]](s32) - ; FIJI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; FIJI-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; FIJI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C16]] - ; FIJI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; FIJI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C22]](s32) - ; FIJI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; FIJI-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; FIJI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C16]] - ; FIJI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; FIJI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C23]](s32) - ; FIJI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; FIJI-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; FIJI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C16]] - ; FIJI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; FIJI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C24]](s32) - ; FIJI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; FIJI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; FIJI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C16]] - ; FIJI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; FIJI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C25]](s32) - ; FIJI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; FIJI-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; FIJI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C16]] - ; FIJI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; FIJI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C26]](s32) - ; FIJI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; FIJI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR12]](s16) - ; FIJI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C16]] - ; FIJI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; FIJI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C27]](s32) - ; FIJI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; FIJI-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR13]](s16) - ; FIJI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C16]] - ; FIJI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; FIJI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C28]](s32) - ; FIJI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; FIJI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; FIJI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C16]] - ; FIJI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; FIJI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C29]](s32) - ; FIJI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; FIJI-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR15]](s16) - ; FIJI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C16]] - ; FIJI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; FIJI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C30]](s32) - ; FIJI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] - ; FIJI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C16]] - ; FIJI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C]](s32) - ; FIJI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] - ; FIJI-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; FIJI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C16]] - ; FIJI-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; FIJI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C31]](s32) - ; FIJI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] - ; FIJI-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; FIJI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[ANYEXT16]], [[C16]] - ; FIJI-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; FIJI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C32]](s32) - ; FIJI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] - ; FIJI-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR18]](s16) - ; FIJI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT17]], [[C16]] - ; FIJI-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; FIJI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C33]](s32) - ; FIJI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] - ; FIJI-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR19]](s16) - ; FIJI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[ANYEXT18]], [[C16]] - ; FIJI-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; FIJI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C34]](s32) - ; FIJI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] - ; FIJI-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) - ; FIJI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[ANYEXT19]], [[C16]] - ; FIJI-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; FIJI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C35]](s32) - ; FIJI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] - ; FIJI-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR21]](s16) - ; FIJI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[ANYEXT20]], [[C16]] - ; FIJI-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; FIJI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C36]](s32) - ; FIJI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] - ; FIJI-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR22]](s16) - ; FIJI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[ANYEXT21]], [[C16]] - ; FIJI-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; FIJI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C37]](s32) - ; FIJI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] - ; FIJI-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR23]](s16) - ; FIJI-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[ANYEXT22]], [[C16]] - ; FIJI-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; FIJI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C38]](s32) - ; FIJI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] - ; FIJI-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR24]](s16) - ; FIJI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[ANYEXT23]], [[C16]] - ; FIJI-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; FIJI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C39]](s32) - ; FIJI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] - ; FIJI-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR25]](s16) - ; FIJI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[ANYEXT24]], [[C16]] - ; FIJI-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; FIJI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C40]](s32) - ; FIJI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] - ; FIJI-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR26]](s16) - ; FIJI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[ANYEXT25]], [[C16]] - ; FIJI-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; FIJI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C41]](s32) - ; FIJI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] - ; FIJI-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR27]](s16) - ; FIJI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[ANYEXT26]], [[C16]] - ; FIJI-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; FIJI-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C42]](s32) - ; FIJI-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] - ; FIJI-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR28]](s16) - ; FIJI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[ANYEXT27]], [[C16]] - ; FIJI-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; FIJI-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C43]](s32) - ; FIJI-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] - ; FIJI-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR29]](s16) - ; FIJI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[ANYEXT28]], [[C16]] - ; FIJI-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; FIJI-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C44]](s32) - ; FIJI-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] - ; FIJI-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR30]](s16) - ; FIJI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[ANYEXT29]], [[C16]] - ; FIJI-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; FIJI-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C45]](s32) - ; FIJI-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] - ; FIJI-NEXT: $vgpr0 = COPY [[OR30]](s32) + ; FIJI-NEXT: $vgpr0 = COPY [[UV]](s32) %0:_(s1025) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -334,61 +334,55 @@ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[C3]](s32), [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(<4 x s32>), [[UV33:%[0-9]+]]:_(<4 x s32>), [[UV34:%[0-9]+]]:_(<4 x s32>), [[UV35:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(<4 x s32>), [[UV37:%[0-9]+]]:_(<4 x s32>), [[UV38:%[0-9]+]]:_(<4 x s32>), [[UV39:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: G_STORE [[UV32]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: G_STORE [[UV24]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV33]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV34]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV35]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV36]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV37]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV38]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV39]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -864,101 +858,67 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C5]](s32) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR6]](s32) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[SHL6]](s32) - ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[SHL6]](s32) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL8]], [[C7]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[EVEC]], [[XOR]] - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]] - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR6]](s32), [[LSHR6]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[SHL]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[SHL]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EVEC]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR]](s32), [[LSHR]](s32) ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC]](<2 x s32>) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C4]](s32) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C4]](s32) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C8]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C8]] - ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND12]], [[C1]](s16) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND11]], [[SHL9]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C8]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C8]] - ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND14]], [[C1]](s16) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND13]], [[SHL10]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C8]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C8]] - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND16]], [[C1]](s16) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND15]], [[SHL11]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C8]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C8]] - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND18]], [[C1]](s16) - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND17]], [[SHL12]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]] - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR11]](s32), [[OR12]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C6]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C5]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C8]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[SHL3]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C8]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND5]], [[SHL4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND8]], [[C8]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND7]], [[SHL5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND10]], [[C8]](s16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[SHL6]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL8]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR5]](s32), [[OR6]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -150,23 +150,16 @@ ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -713,23 +706,16 @@ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -739,23 +725,16 @@ ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -2351,53 +2351,19 @@ ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 4) %2:_(s16) = G_BITCAST %1 @@ -2416,53 +2382,19 @@ ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 4) %2:_(s16) = G_BITCAST %1 @@ -2487,18 +2419,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2510,17 +2431,7 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2532,17 +2443,7 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 4) %2:_(s16) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -2189,53 +2189,19 @@ ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 0) %2:_(s16) = G_BITCAST %1 @@ -2254,53 +2220,19 @@ ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 0) %2:_(s16) = G_BITCAST %1 @@ -2325,18 +2257,7 @@ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -2348,17 +2269,7 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -2370,17 +2281,7 @@ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 0) %2:_(s16) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -3585,106 +3585,37 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 1) %2:_(s16) = G_BITCAST %1 @@ -3703,106 +3634,37 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 1) %2:_(s16) = G_BITCAST %1 @@ -3827,36 +3689,13 @@ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-HSA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} @@ -3868,18 +3707,7 @@ ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; CI-MESA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; CI-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_global_v2s8_align1 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -3891,34 +3719,13 @@ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-HSA-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-HSA-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-HSA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -3930,17 +3737,7 @@ ; GFX9-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX9-MESA-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] - ; GFX9-MESA-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) - ; GFX9-MESA-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] - ; GFX9-MESA-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 1) %2:_(s16) = G_BITCAST %1 @@ -6543,27 +6340,18 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) + ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -6571,27 +6359,18 @@ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) + ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 16, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -8269,33 +8048,20 @@ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) + ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) + ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) + ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} @@ -8303,33 +8069,20 @@ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) + ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) + ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) + ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 16, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -6565,174 +6565,61 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: liveins: $vgpr0 ; CI-DS128-NEXT: {{ $}} ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-DS128-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-DS128-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-DS128-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-DS128-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-DS128-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-DS128-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX9-UNALIGNED: liveins: $vgpr0 ; GFX9-UNALIGNED-NEXT: {{ $}} ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_load_local_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX10-UNALIGNED: liveins: $vgpr0 ; GFX10-UNALIGNED-NEXT: {{ $}} ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX11-LABEL: name: test_load_local_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX11-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX11-UNALIGNED: liveins: $vgpr0 ; GFX11-UNALIGNED-NEXT: {{ $}} ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 3) %2:_(s16) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4528,105 +4528,37 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: liveins: $vgpr0 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; CI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_load_private_v2s8_align2 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; GFX11-LABEL: name: test_load_private_v2s8_align2 ; GFX11: liveins: $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX11-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 5) %2:_(s16) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -756,36 +756,28 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV4]], [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR3]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -900,40 +892,32 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR3]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC9]](s16), [[TRUNC10]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -133,17 +133,12 @@ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -154,42 +149,42 @@ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST1]] ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1 ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C6]](s32) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C7]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C7]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -589,44 +589,39 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -822,41 +822,37 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR3]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: test_sext_inreg_v3s16_1 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -751,40 +751,32 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC9]](s16), [[TRUNC10]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -432,33 +432,23 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR4]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -432,33 +432,23 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR4]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -589,44 +589,39 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -6593,11 +6593,11 @@ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} @@ -6606,11 +6606,11 @@ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} @@ -6619,11 +6619,11 @@ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} @@ -6632,11 +6632,11 @@ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<12 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -459,44 +459,39 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -437,33 +437,23 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR4]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -437,33 +437,23 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF2]](s16) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR4]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF2]](s16) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -443,44 +443,39 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -528,10 +528,7 @@ ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: