diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -423,6 +423,12 @@ return BinaryOp_match(L, R); } +template +inline BinaryOp_match +m_GBuildVector(const LHS &L, const RHS &R) { + return BinaryOp_match(L, R); +} + template inline BinaryOp_match m_GPtrAdd(const LHS &L, const RHS &R) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -45,6 +45,12 @@ [{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]), (apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>; +def trunc_buildvector_fold : GICombineRule< + (defs root:$op, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_TRUNC):$op, + [{ return PostLegalizerHelper.matchTruncBuildVectorFold(*${op}, ${matchinfo}); }]), + (apply [{ PostLegalizerHelper.applyTruncBuildVectorFold(*${op}, ${matchinfo}); }])>; + def clamp_i64_to_i16_matchdata : GIDefMatchData<"AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo">; def clamp_i64_to_i16 : GICombineRule< @@ -53,6 +59,12 @@ [{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]), (apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>; +def v2s16_extract_insert_vector_elt_fold : GICombineRule< + (defs root:$op, unsigned_matchinfo:$matchinfo), + (match (wip_match_opcode G_INSERT_VECTOR_ELT, G_EXTRACT_VECTOR_ELT):$op, + [{ return PreLegalizerHelper.matchV2S16ExtractInsertVectorEltFold(*${op}, ${matchinfo}); }]), + (apply [{ PreLegalizerHelper.applyV2S16ExtractInsertVectorEltFold(*${op}, ${matchinfo}); }])>; + def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">; def int_minmax_to_med3 : GICombineRule< @@ -109,7 +121,8 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< "AMDGPUGenPreLegalizerCombinerHelper", - [all_combines, clamp_i64_to_i16, foldable_fneg]> { + [all_combines, clamp_i64_to_i16, foldable_fneg, + v2s16_extract_insert_vector_elt_fold]> { let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; let StateClass = "AMDGPUPreLegalizerCombinerHelperState"; let AdditionalArguments = []; @@ -119,7 +132,7 @@ "AMDGPUGenPostLegalizerCombinerHelper", [all_combines, gfx6gfx7_combines, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, - rcp_sqrt_to_rsq]> { + rcp_sqrt_to_rsq, trunc_buildvector_fold]> { let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule"; let StateClass = "AMDGPUPostLegalizerCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -74,6 +74,9 @@ const CvtF32UByteMatchInfo &MatchInfo); bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg); + + bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); + void applyTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo); }; bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( @@ -303,6 +306,25 @@ return TLI->isCanonicalized(Reg, MF); } +bool AMDGPUPostLegalizerCombinerHelper::matchTruncBuildVectorFold( + MachineInstr &MI, Register &MatchInfo) { + assert(MI.getOpcode() == AMDGPU::G_TRUNC); + + // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x + // if type(x) == type(G_TRUNC) + if (!mi_match(MI.getOperand(1).getReg(), MRI, + m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg())))) + return false; + + return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg()); +} + +void AMDGPUPostLegalizerCombinerHelper::applyTruncBuildVectorFold( + MachineInstr &MI, Register &MatchInfo) { + MRI.replaceRegWith(MI.getOperand(0).getReg(), MatchInfo); + MI.eraseFromParent(); +} + class AMDGPUPostLegalizerCombinerHelperState { protected: AMDGPUCombinerHelper &Helper; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -55,6 +55,9 @@ void applyClampI64ToI16(MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo); + + bool matchV2S16ExtractInsertVectorEltFold(MachineInstr &MI, unsigned &Idx); + void applyV2S16ExtractInsertVectorEltFold(MachineInstr &MI, unsigned &Idx); }; bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( @@ -154,6 +157,63 @@ MI.eraseFromParent(); } +bool AMDGPUPreLegalizerCombinerHelper::matchV2S16ExtractInsertVectorEltFold( + MachineInstr &MI, unsigned &Idx) { + assert(MI.getOpcode() == AMDGPU::G_INSERT_VECTOR_ELT || + MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT); + + // Match v2s16 with constant indices for both operations. + LLT VecTy = MRI.getType(MI.getOperand(1).getReg()); + if (VecTy != LLT::fixed_vector(2, 16)) + return false; + + const auto IdxOpPos = (MI.getOpcode() == AMDGPU::G_INSERT_VECTOR_ELT) ? 3 : 2; + const auto IdxVal = + getIConstantVRegValWithLookThrough(MI.getOperand(IdxOpPos).getReg(), MRI); + if (!IdxVal) + return false; + + Idx = IdxVal->Value.getZExtValue(); + // Additionally check that the indice is in bounds. + return Idx == 0 || Idx == 1; +} + +void AMDGPUPreLegalizerCombinerHelper::applyV2S16ExtractInsertVectorEltFold( + MachineInstr &MI, unsigned &Idx) { + // Fold v2s16 G_(EXTRACT|INSERT)_VECTOR_ELT with constant indices into + // G_UNMERGE_VALUE (+ G_BUILD_VECTOR for INSERT). + // This is done because V2S16 G_(EXTRACT|INSERT)_VECTOR_ELT aren't legal, + // but we don't want them to become bitwise ops and prefer this form as + // it makes selection of instructions such as mad_mix easier. + // Moreover, the source operation (that generates the V2S16 value) often gets + // scalarized so the UNMERGE_VALUE will disappear. + assert(Idx == 0 || Idx == 1); + + B.setInstrAndDebugLoc(MI); + + Register Dst = MI.getOperand(0).getReg(); + Register VecSrc = MI.getOperand(1).getReg(); + LLT EltTy = MRI.getType(VecSrc).getElementType(); + assert(EltTy == LLT::scalar(16)); + + const unsigned OtherEltIdx = (Idx == 0) ? 1 : 0; + if (MI.getOpcode() == AMDGPU::G_INSERT_VECTOR_ELT) { + Register EltSrc = MI.getOperand(2).getReg(); + Register OtherElt = B.buildUnmerge(EltTy, VecSrc).getReg(OtherEltIdx); + if (Idx == 0) + B.buildBuildVector(Dst, {EltSrc, OtherElt}); + else + B.buildBuildVector(Dst, {OtherElt, EltSrc}); + } else { + assert(MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT); + Register Elt = B.buildUnmerge(EltTy, VecSrc).getReg(Idx); + + Helper.replaceRegWith(MRI, Dst, Elt); + } + + MI.eraseFromParent(); +} + class AMDGPUPreLegalizerCombinerHelperState { protected: AMDGPUCombinerHelper &Helper; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -18,10 +18,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -43,10 +41,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -75,14 +71,12 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -105,14 +99,12 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -144,17 +136,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -181,17 +170,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -227,17 +213,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -264,17 +247,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -309,14 +289,12 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -339,14 +317,12 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -378,17 +354,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -415,17 +388,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -461,17 +431,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -498,17 +465,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -544,20 +508,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -583,20 +544,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -631,14 +589,12 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -661,14 +617,12 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -700,17 +654,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -737,17 +688,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -783,20 +731,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -822,20 +767,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -871,20 +813,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -910,20 +849,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -959,17 +895,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -996,17 +929,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1042,20 +972,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -1081,20 +1008,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -1134,10 +1058,8 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d @@ -1159,10 +1081,8 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: @@ -1191,14 +1111,12 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 @@ -1221,14 +1139,12 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 @@ -1260,17 +1176,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1297,17 +1210,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1343,17 +1253,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1380,17 +1287,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1425,14 +1329,12 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 @@ -1455,14 +1357,12 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 @@ -1494,17 +1394,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1531,17 +1428,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1577,17 +1471,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1614,17 +1505,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1660,20 +1548,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -1699,20 +1584,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -1747,14 +1629,12 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 @@ -1777,14 +1657,12 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 @@ -1816,17 +1694,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1853,17 +1728,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -1899,20 +1771,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -1938,20 +1807,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -1987,20 +1853,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -2026,20 +1889,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -2075,17 +1935,14 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -2112,17 +1969,14 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) @@ -2158,20 +2012,17 @@ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -2197,20 +2048,17 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) @@ -2240,10 +2088,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2265,10 +2111,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2297,10 +2141,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2322,10 +2164,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2354,10 +2194,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2379,10 +2217,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2411,10 +2247,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2436,10 +2270,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2468,10 +2300,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2493,10 +2323,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2525,10 +2353,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2550,10 +2376,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2582,10 +2406,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2607,10 +2429,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2639,10 +2459,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2664,10 +2482,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2696,10 +2512,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX9-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -2717,10 +2531,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10NSA-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -2745,10 +2557,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2768,10 +2578,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2799,10 +2607,8 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V1 @@ -2820,10 +2626,8 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: @@ -2850,10 +2654,8 @@ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V2 @@ -2873,10 +2675,8 @@ ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX10NSA-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: @@ -2900,10 +2700,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2925,10 +2723,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2957,10 +2753,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -2982,10 +2776,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3014,10 +2806,8 @@ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3039,10 +2829,8 @@ ; GFX10NSA-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) @@ -3076,10 +2864,8 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc @@ -3101,10 +2887,8 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: @@ -3133,10 +2917,8 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_slc @@ -3158,10 +2940,8 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: @@ -3190,10 +2970,8 @@ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9-NEXT: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc_slc @@ -3215,10 +2993,8 @@ ; GFX10NSA-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10NSA-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA-NEXT: S_ENDPGM 0 main_body: @@ -3272,10 +3048,8 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -3299,10 +3073,8 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -3336,14 +3108,12 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) @@ -3368,14 +3138,12 @@ ; GFX10NSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) @@ -3412,17 +3180,14 @@ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) @@ -3451,17 +3216,14 @@ ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF1]](s16) @@ -3502,20 +3264,17 @@ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) @@ -3543,20 +3302,17 @@ ; GFX10NSA-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10NSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10NSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10NSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10NSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10NSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10NSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10NSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX10NSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX10NSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX10NSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-trunc-bitcast-buildvector.mir @@ -0,0 +1,106 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: s16_trunc_v2s16_buildvector +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: s16_trunc_v2s16_buildvector + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C]], [[TRUNC]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 + %5:_(s32) = G_BITCAST %4 + %6:_(s16) = G_TRUNC %5 + %7:_(s16) = G_CONSTANT i16 42 + %8:_(s16) = G_OR %7, %6 + %9:_(s32) = G_ZEXT %8 + $vgpr0 = COPY %9 +... + +--- +name: s16_trunc_v2s32_buildvector_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: s16_trunc_v2s32_buildvector_nofold + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C]], [[TRUNC]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + %3:_(s64) = G_BITCAST %2 + %4:_(s16) = G_TRUNC %3 + %5:_(s16) = G_CONSTANT i16 42 + %6:_(s16) = G_OR %5, %4 + %7:_(s32) = G_ZEXT %6 + $vgpr0 = COPY %7 +... + +--- +name: s32_trunc_v2s32_buildvector +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: s32_trunc_v2s32_buildvector + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + %3:_(s64) = G_BITCAST %2 + %4:_(s32) = G_TRUNC %3 + $vgpr0 = COPY %4 +... + +--- +name: s32_trunc_v2s32_buildvector_multiple_users +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: s32_trunc_v2s32_buildvector_multiple_users + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[EVEC]](s32) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BITCAST]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + %3:_(s64) = G_BITCAST %2 + %4:_(s32) = G_TRUNC %3 + %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 + $vgpr0 = COPY %4 + $vgpr1 = COPY %5 + $vgpr2_vgpr3 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-v2s16-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-v2s16-extract-vector-elt.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-v2s16-extract-vector-elt.mir @@ -0,0 +1,87 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: v2s16_extract_0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: v2s16_extract_0 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C]], [[TRUNC]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 + %3:_(s16) = G_CONSTANT i16 42 + %4:_(s16) = G_OR %3, %2 + %5:_(s32) = G_ZEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: v2s16_extract_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: v2s16_extract_1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: v2s16_extract_outofbounds_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: v2s16_extract_outofbounds_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 2 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: v2s16_extract_noconstantindice_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: v2s16_extract_noconstantindice_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[BITCAST]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = G_BITCAST %0 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-v2s16-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-v2s16-insert-vector-elt.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-v2s16-insert-vector-elt.mir @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: v2s16_insert_0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: v2s16_insert_0 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[UV1]](s16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0, %2, %3 + %5:_(s32) = G_BITCAST %4 + $vgpr0 = COPY %5 +... + +--- +name: v2s16_insert_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: v2s16_insert_1 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC]](s16) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_CONSTANT i32 1 + %4:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0, %2, %3 + %5:_(s32) = G_BITCAST %4 + $vgpr0 = COPY %5 +... + +--- +name: v2s16_insert_outofbounds_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: v2s16_insert_outofbounds_nofold + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_CONSTANT i32 2 + %4:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0, %2, %3 + %5:_(s32) = G_BITCAST %4 + $vgpr0 = COPY %5 +... + +--- +name: v2s16_insert_noconstantindice_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: v2s16_insert_noconstantindice_nofold + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[COPY1]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[IVEC]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0, %2, %1 + %5:_(s32) = G_BITCAST %4 + $vgpr0 = COPY %5 +...