Index: llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -172,6 +172,7 @@ default: break; case TargetOpcode::G_ADD: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -443,6 +443,7 @@ default: break; case TargetOpcode::G_ADD: + case TargetOpcode::G_PTR_ADD: return C1 + C2; case TargetOpcode::G_AND: return C1 & C2; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -1999,12 +1999,12 @@ ; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_cselect_b32 s7, s16, s15 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: s_mov_b64 s[0:1], 16 +; GFX9-NEXT: v_mov_b32_e32 v4, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_s_s: @@ -2163,19 +2163,19 @@ ; GFX9-NEXT: v_and_or_b32 v10, v1, s13, v0 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], s12, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v10, s[12:13] -; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v10, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v10, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v5, v10, s[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v10, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v10, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v10, s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, v10, s[10:11] ; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_mov_b32_e32 v10, 16 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v10, 0 +; GFX9-NEXT: v_mov_b32_e32 v11, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_v_v16i16_s_s: @@ -2335,9 +2335,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: global_store_dwordx4 v0, v[4:7], s[0:1] +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_v_s: @@ -2509,9 +2510,9 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, s22 ; GFX9-NEXT: v_mov_b32_e32 v7, s23 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[12:13] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[14:15] ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] @@ -2520,9 +2521,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: global_store_dwordx4 v0, v[4:7], s[0:1] +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_s_v: @@ -2697,9 +2699,9 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, s18 ; GFX9-NEXT: v_mov_b32_e32 v7, s19 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[12:13] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] @@ -2708,9 +2710,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: global_store_dwordx4 v0, v[4:7], s[0:1] +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_s_v16i16_v_v: @@ -2871,20 +2874,20 @@ ; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v9, s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v10, s[10:11] ; GFX9-NEXT: v_and_or_b32 v11, v11, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v5, v11, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v11, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v5, v11, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v11, s[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v11, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v11, s[8:9] ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc -; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v11, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[10:11] -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v10, 0 +; GFX9-NEXT: v_mov_b32_e32 v10, 16 +; GFX9-NEXT: v_mov_b32_e32 v9, 0 +; GFX9-NEXT: v_mov_b32_e32 v11, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_v_v16i16_s_v: @@ -3019,20 +3022,20 @@ ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v10, s[10:11] ; GFX9-NEXT: v_and_or_b32 v11, v1, s13, v0 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], s12, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v5, v11, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v11, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v5, v11, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v6, v11, s[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v11, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e64 v6, v9, v11, s[8:9] ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc -; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v11, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[10:11] -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v10, 0 +; GFX9-NEXT: v_mov_b32_e32 v10, 16 +; GFX9-NEXT: v_mov_b32_e32 v9, 0 +; GFX9-NEXT: v_mov_b32_e32 v11, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_v_v16i16_v_s: @@ -3174,13 +3177,13 @@ ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v12, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v7, v12, s[2:3] -; GFX9-NEXT: v_mov_b32_e32 v9, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v12, s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v7, v11, v12, s[10:11] -; GFX9-NEXT: s_mov_b64 s[0:1], 16 -; GFX9-NEXT: v_mov_b32_e32 v10, 0 +; GFX9-NEXT: v_mov_b32_e32 v10, 16 +; GFX9-NEXT: v_mov_b32_e32 v9, 0 +; GFX9-NEXT: v_mov_b32_e32 v11, 0 ; GFX9-NEXT: global_store_dwordx4 v[8:9], v[0:3], off -; GFX9-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX9-NEXT: global_store_dwordx4 v[10:11], v[4:7], off ; GFX9-NEXT: s_endpgm ; ; GFX8-LABEL: insertelement_v_v16i16_v_v: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1866,11 +1866,11 @@ ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null`, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) + ; CHECK: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 4 + ; CHECK: G_STORE [[LOAD1]](s32), [[C4]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[C6:%[0-9]+]]:_(p1) = G_CONSTANT i64 8 + ; CHECK: G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) ; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY4]]