Index: llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -53,6 +53,11 @@ cl::value_desc("0|1|2"), cl::init(0), cl::Hidden); +// Using weight increases compile time too much. +static cl::opt UseWeight("amdgpu-regbanks-reassign-use-weight", + cl::desc("Calculate stall cycle weights"), + cl::init(false), cl::Hidden); + #define DEBUG_TYPE "amdgpu-regbanks-reassign" #define NUM_VGPR_BANKS 4 @@ -438,6 +443,9 @@ unsigned Reg2, unsigned StallCycles) const { + if (!UseWeight) + return 0; + unsigned Defs = 0; MachineBasicBlock::const_instr_iterator Def(MI.getIterator()); MachineBasicBlock::const_instr_iterator B(MI.getParent()->instr_begin()); @@ -808,7 +816,8 @@ LLVM_DEBUG(dbgs() << "=== " << StallCycles << " stall cycles detected in " "function " << MF.getName() << '\n'); - Candidates.sort(); + if (UseWeight) + Candidates.sort(); LLVM_DEBUG(dbgs() << "\nCandidates:\n\n"; for (auto C : Candidates) C.dump(this); @@ -827,7 +836,8 @@ if (LocalCyclesSaved) { removeCandidates(C.Reg); computeStallCycles(C.Reg, AMDGPU::NoRegister, 0, -1, true); - Candidates.sort(); + if (UseWeight) + Candidates.sort(); LLVM_DEBUG(dbgs() << "\nCandidates:\n\n"; for (auto C : Candidates) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll @@ -21,12 +21,12 @@ ; GFX10-LABEL: sample_d_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_or_b32 v3, v2, v6, v3 -; GFX10-NEXT: v_and_or_b32 v10, v0, v6, v1 -; GFX10-NEXT: image_sample_d_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_or_b32 v7, v0, v6, v1 +; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 +; GFX10-NEXT: image_sample_d_g16 v[0:3], [v7, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -77,9 +77,9 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2 -; GFX10-NEXT: v_and_or_b32 v3, v3, v7, v4 -; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_or_b32 v11, v1, v7, v2 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v11, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -107,12 +107,12 @@ ; GFX10-LABEL: sample_d_cl_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_or_b32 v3, v2, v7, v9 -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1 -; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_or_b32 v3, v0, v7, v1 +; GFX10-NEXT: v_and_or_b32 v1, v2, v7, v9 +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v3, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -173,12 +173,12 @@ ; GFX10-LABEL: sample_cd_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_or_b32 v3, v2, v6, v3 -; GFX10-NEXT: v_and_or_b32 v10, v0, v6, v1 -; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_or_b32 v7, v0, v6, v1 +; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 +; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v7, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -209,9 +209,9 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2 -; GFX10-NEXT: v_and_or_b32 v3, v3, v7, v4 -; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_or_b32 v11, v1, v7, v2 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v11, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -239,12 +239,12 @@ ; GFX10-LABEL: sample_cd_cl_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_or_b32 v3, v2, v7, v9 -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1 -; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_or_b32 v3, v0, v7, v1 +; GFX10-NEXT: v_and_or_b32 v1, v2, v7, v9 +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v3, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -2983,15 +2983,15 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v10, v4 ; GFX10-NEXT: v_mov_b32_e32 v11, v5 -; GFX10-NEXT: v_mov_b32_e32 v8, v6 -; GFX10-NEXT: v_mov_b32_e32 v9, v7 +; GFX10-NEXT: v_mov_b32_e32 v4, v6 +; GFX10-NEXT: v_mov_b32_e32 v5, v7 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v10 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v11, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, v8 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v9, vcc_lo +; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, v4 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[10:11] -; GFX10-NEXT: v_cmp_lt_u64_e64 s4, v[2:3], v[8:9] +; GFX10-NEXT: v_cmp_lt_u64_e64 s4, v[2:3], v[4:5] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, -1, s4 Index: llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -511,11 +511,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB2_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_add_rtn_u32 v0, v0, v7 +; GFX1064-NEXT: ds_add_rtn_u32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -563,11 +563,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB2_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_add_rtn_u32 v0, v0, v7 +; GFX1032-NEXT: ds_add_rtn_u32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -750,11 +750,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB3_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_add_rtn_u32 v0, v0, v7 +; GFX1064-NEXT: ds_add_rtn_u32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -802,11 +802,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB3_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_add_rtn_u32 v0, v0, v7 +; GFX1032-NEXT: ds_add_rtn_u32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -989,11 +989,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB4_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_add_rtn_u32 v0, v0, v7 +; GFX1064-NEXT: ds_add_rtn_u32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -1041,11 +1041,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB4_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_add_rtn_u32 v0, v0, v7 +; GFX1032-NEXT: ds_add_rtn_u32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -2064,11 +2064,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB10_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_sub_rtn_u32 v0, v0, v7 +; GFX1064-NEXT: ds_sub_rtn_u32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -2116,11 +2116,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB10_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_sub_rtn_u32 v0, v0, v7 +; GFX1032-NEXT: ds_sub_rtn_u32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -2799,11 +2799,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB14_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_and_rtn_b32 v0, v0, v7 +; GFX1064-NEXT: ds_and_rtn_b32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -2850,11 +2850,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB14_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_and_rtn_b32 v0, v0, v7 +; GFX1032-NEXT: ds_and_rtn_b32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -3037,11 +3037,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB15_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_or_rtn_b32 v0, v0, v7 +; GFX1064-NEXT: ds_or_rtn_b32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -3089,11 +3089,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB15_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_or_rtn_b32 v0, v0, v7 +; GFX1032-NEXT: ds_or_rtn_b32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -3276,11 +3276,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB16_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_xor_rtn_b32 v0, v0, v7 +; GFX1064-NEXT: ds_xor_rtn_b32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -3328,11 +3328,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB16_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_xor_rtn_b32 v0, v0, v7 +; GFX1032-NEXT: ds_xor_rtn_b32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -3512,11 +3512,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB17_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_max_rtn_i32 v0, v0, v7 +; GFX1064-NEXT: ds_max_rtn_i32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -3563,11 +3563,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB17_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_max_rtn_i32 v0, v0, v7 +; GFX1032-NEXT: ds_max_rtn_i32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -3932,11 +3932,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB19_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_min_rtn_i32 v0, v0, v7 +; GFX1064-NEXT: ds_min_rtn_i32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -3983,11 +3983,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB19_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_min_rtn_i32 v0, v0, v7 +; GFX1032-NEXT: ds_min_rtn_i32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -4355,11 +4355,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB21_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_max_rtn_u32 v0, v0, v7 +; GFX1064-NEXT: ds_max_rtn_u32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -4407,11 +4407,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB21_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_max_rtn_u32 v0, v0, v7 +; GFX1032-NEXT: ds_max_rtn_u32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv @@ -4773,11 +4773,11 @@ ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB23_2 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1064-NEXT: v_mov_b32_e32 v7, s3 +; GFX1064-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1064-NEXT: v_mov_b32_e32 v4, s3 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1064-NEXT: ds_min_rtn_u32 v0, v0, v7 +; GFX1064-NEXT: ds_min_rtn_u32 v0, v7, v4 ; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv ; GFX1064-NEXT: buffer_gl1_inv @@ -4824,11 +4824,11 @@ ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB23_2 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo -; GFX1032-NEXT: v_mov_b32_e32 v7, s3 +; GFX1032-NEXT: v_mov_b32_e32 v7, local_var32@abs32@lo +; GFX1032-NEXT: v_mov_b32_e32 v4, s3 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1032-NEXT: ds_min_rtn_u32 v0, v0, v7 +; GFX1032-NEXT: ds_min_rtn_u32 v0, v7, v4 ; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv ; GFX1032-NEXT: buffer_gl1_inv Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll @@ -676,13 +676,13 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v5, v10, v5 ; GFX10-NEXT: v_and_b32_e32 v3, v10, v3 ; GFX10-NEXT: v_and_b32_e32 v1, v10, v1 -; GFX10-NEXT: v_and_b32_e32 v5, v10, v5 +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: v_lshl_or_b32 v6, v6, 16, v5 -; GFX10-NEXT: image_sample_c_d v[0:3], [v0, v1, v3, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 +; GFX10-NEXT: image_sample_c_d v[0:3], [v0, v2, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -730,13 +730,13 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v4 -; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 -; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: v_lshl_or_b32 v5, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 +; GFX10-NEXT: image_sample_d_cl v[0:3], [v3, v5, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -787,12 +787,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: v_and_b32_e32 v5, v8, v5 -; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 -; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: v_lshl_or_b32 v6, v4, 16, v3 -; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v6, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 +; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v2, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -888,13 +888,13 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_and_b32_e32 v5, v10, v5 ; GFX10-NEXT: v_and_b32_e32 v3, v10, v3 ; GFX10-NEXT: v_and_b32_e32 v1, v10, v1 -; GFX10-NEXT: v_and_b32_e32 v5, v10, v5 +; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 -; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: v_lshl_or_b32 v6, v6, 16, v5 -; GFX10-NEXT: image_sample_c_cd v[0:3], [v0, v1, v3, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 +; GFX10-NEXT: image_sample_c_cd v[0:3], [v0, v2, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -942,13 +942,13 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; GFX10-NEXT: v_and_b32_e32 v4, v7, v4 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v4 -; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 -; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: v_lshl_or_b32 v5, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 +; GFX10-NEXT: image_sample_cd_cl v[0:3], [v3, v5, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -999,12 +999,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: v_and_b32_e32 v5, v8, v5 -; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 +; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 -; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 -; GFX10-NEXT: v_lshl_or_b32 v6, v4, 16, v3 -; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v6, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 +; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1 +; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v2, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -1203,13 +1203,13 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 -; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; GFX10-NEXT: v_and_b32_e32 v6, v9, v6 -; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 +; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 +; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 -; GFX10-NEXT: v_lshl_or_b32 v7, v7, 16, v6 -; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v4, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: v_lshl_or_b32 v7, v5, 16, v4 +; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v7, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -1238,13 +1238,13 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 -; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; GFX10-NEXT: v_and_b32_e32 v6, v9, v6 -; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 +; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 +; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 +; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 -; GFX10-NEXT: v_lshl_or_b32 v7, v7, 16, v6 -; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v4, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: v_lshl_or_b32 v7, v5, 16, v4 +; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v7, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll @@ -93,11 +93,11 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; encoding: [0xff,0x02,0x0e,0x7e,0xff,0xff,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36] ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 ; encoding: [0x07,0x05,0x04,0x36] -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04] -; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 ; encoding: [0x03,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04] -; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00,0x03,0x04,0x05,0x06] +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36] +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04] +; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04] +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v3, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0x8c,0xf0,0x03,0x00,0x40,0x00,0x02,0x04,0x05,0x06] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -209,11 +209,11 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; encoding: [0xff,0x02,0x0e,0x7e,0xff,0xff,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36] ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 ; encoding: [0x07,0x05,0x04,0x36] -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04] -; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 ; encoding: [0x03,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04] -; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00,0x03,0x04,0x05,0x06] +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36] +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04] +; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04] +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v3, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0xa4,0xf1,0x03,0x00,0x40,0x00,0x02,0x04,0x05,0x06] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] ; GFX10-NEXT: ; return to shader part epilog main_body: Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll @@ -93,11 +93,11 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 -; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 +; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v3, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: @@ -209,11 +209,11 @@ ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 -; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 +; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 +; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v3, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog main_body: