Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1857,7 +1857,10 @@ RegOp.ChangeToImmediate(NonRegOp.getImm()); else if (NonRegOp.isFI()) RegOp.ChangeToFrameIndex(NonRegOp.getIndex()); - else + else if (NonRegOp.isGlobal()) { + RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(), + NonRegOp.getTargetFlags()); + } else return nullptr; // Make sure we don't reinterpret a subreg index in the target flags. Index: llvm/test/CodeGen/AMDGPU/lds-relocs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lds-relocs.ll +++ llvm/test/CodeGen/AMDGPU/lds-relocs.ll @@ -35,7 +35,7 @@ ; GCN: v_mov_b32_e32 v1, lds.external@abs32@lo ; encoding: [0xff,0x02,0x02,0x7e,A,A,A,A] ; GCN-NEXT: ; fixup A - offset: 4, value: lds.external@abs32@lo, kind: FK_Data_4{{$}} ; -; GCN: s_add_i32 s0, lds.defined@abs32@lo, s0 ; encoding: [0xff,0x00,0x00,0x81,A,A,A,A] +; GCN: s_add_i32 s0, s0, lds.defined@abs32@lo ; encoding: [0x00,0xff,0x00,0x81,A,A,A,A] ; GCN-NEXT: ; fixup A - offset: 4, value: lds.defined@abs32@lo, kind: FK_Data_4{{$}} ; ; GCN: .globl lds.external Index: llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir +++ llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir @@ -5,7 +5,11 @@ define void @commute_instruction_subreg_target_flag() { ret void } define void @commute_target_flag_frame_index() { ret void } define void @commute_target_flag_global() { ret void } + define void @commute_target_flag_global_offset() { ret void } + define void @commute_target_flag_global_offset_mismatch() { ret void } + declare void @func() + @gv = external addrspace(1) global i32 ... @@ -64,11 +68,49 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc - ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc - ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]] + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]] %0:sreg_64 = COPY $sgpr0_sgpr1 %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc S_ENDPGM 0, implicit %1, implicit %2 ... + +--- +name: commute_target_flag_global_offset +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: commute_target_flag_global_offset + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]] + %0:sreg_64 = COPY $sgpr0_sgpr1 + %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc + %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 4, %0.sub0, implicit-def dead $scc + S_ENDPGM 0, implicit %1, implicit %2 + +... + +--- +name: commute_target_flag_global_offset_mismatch +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: commute_target_flag_global_offset_mismatch + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc + ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, [[COPY]].sub0, implicit-def dead $scc + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]] + %0:sreg_64 = COPY $sgpr0_sgpr1 + %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc + %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, %0.sub0, implicit-def dead $scc + S_ENDPGM 0, implicit %1, implicit %2 + +...