Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1860,6 +1860,9 @@ else return nullptr; + // Make sure we don't reinterpret a subreg index in the target flags. + RegOp.setTargetFlags(NonRegOp.getTargetFlags()); + NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug); NonRegOp.setSubReg(SubReg); Index: llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir @@ -0,0 +1,74 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=machine-cse -o - %s | FileCheck %s + +--- | + define void @commute_instruction_subreg_target_flag() { ret void } + define void @commute_target_flag_frame_index() { ret void } + define void @commute_target_flag_global() { ret void } + declare void @func() + +... + +# Make sure the subreg index is cleared when commuting a register and immediate. + +--- +name: commute_instruction_subreg_target_flag +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: commute_instruction_subreg_target_flag + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]].sub1, 64, 0, implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]] + %0:vreg_64 = COPY $vgpr0_vgpr1 + %1:vgpr_32 = V_ADD_U32_e64 %0.sub1, 64, 0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e64 64, %0.sub1, 0, implicit $exec + S_ENDPGM 0, implicit %1, implicit %2 + +... + +# FIXME: Why doesn't this CSE? +--- +name: commute_target_flag_frame_index +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4 } +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: commute_target_flag_frame_index + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, [[COPY]].sub0, 0, implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]] + %0:vreg_64 = COPY $vgpr0_vgpr1 + %1:vgpr_32 = V_ADD_U32_e64 %0.sub0, %stack.0, 0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e64 %stack.0, %0.sub0, 0, implicit $exec + S_ENDPGM 0, implicit %1, implicit %2 + +... + +# FIXME: Handle commuting global variables +--- +name: commute_target_flag_global +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: commute_target_flag_global + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc + ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]] + %0:sreg_64 = COPY $sgpr0_sgpr1 + %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc + %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc + S_ENDPGM 0, implicit %1, implicit %2 + +...