diff --git a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir --- a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir @@ -165,6 +165,37 @@ S_ENDPGM 0, implicit %0 ... +--- +name: test_remat_s_load_dword_immx16_subreg +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 + ; GCN-LABEL: name: test_remat_s_load_dword_immx16_subreg + ; GCN: liveins: $sgpr8_sgpr9, $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 0, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) + ; GCN-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sp_reg :: (store (s512) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr0_sgpr1, 128, 0 :: (dereferenceable invariant load (s512), align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s512) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1 + %0:sreg_64_xexec = COPY $sgpr8_sgpr9 + %1:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (invariant dereferenceable load (s512), align 4, addrspace 4) + %2:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 128, 0 :: (invariant dereferenceable load (s512), align 4, addrspace 4) + %2.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_512 = COPY %1.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_512 + S_NOP 0, implicit %2.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 + %3:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 128, 0 :: (invariant dereferenceable load (s512), align 4, addrspace 4) + %3.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7:sgpr_512 = COPY %1.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7:sgpr_512 + S_NOP 0, implicit %3.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7:sgpr_512 + + S_ENDPGM 0, implicit %0 +... + --- name: test_remat_s_get_waveid_in_workgroup tracksRegLiveness: true @@ -360,24 +391,20 @@ S_ENDPGM 0, implicit %0 ... -#--------------------------------------------------------------------- -# Negative tests -#--------------------------------------------------------------------- - --- -name: test_no_remat_s_load_dword_not_invariant +name: test_remat_s_load_dword_invariant_not_dereferenceable tracksRegLiveness: true body: | bb.0: liveins: $sgpr8_sgpr9 - ; GCN-LABEL: name: test_no_remat_s_load_dword_not_invariant + ; GCN-LABEL: name: test_remat_s_load_dword_invariant_not_dereferenceable ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (dereferenceable load (s32), addrspace 4) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (invariant load (s32), addrspace 4) ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (dereferenceable load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable load (s32), addrspace 4) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (invariant load (s32), addrspace 4) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (invariant load (s32), addrspace 4) ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 @@ -386,29 +413,34 @@ ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 - %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable load (s32), addrspace 4) - %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 4, 0 :: (dereferenceable load (s32), addrspace 4) - %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (dereferenceable load (s32), addrspace 4) + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (invariant load (s32), addrspace 4) + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 4, 0 :: (invariant load (s32), addrspace 4) + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (invariant load (s32), addrspace 4) S_NOP 0, implicit %1 S_NOP 0, implicit %2 S_NOP 0, implicit %3 S_ENDPGM 0, implicit %0 ... + +#--------------------------------------------------------------------- +# Negative tests +#--------------------------------------------------------------------- + --- -name: test_no_remat_s_load_dword_not_dereferenceable +name: test_no_remat_s_load_dword_not_invariant tracksRegLiveness: true body: | bb.0: liveins: $sgpr8_sgpr9 - ; GCN-LABEL: name: test_no_remat_s_load_dword_not_dereferenceable + ; GCN-LABEL: name: test_no_remat_s_load_dword_not_invariant ; GCN: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (invariant load (s32), addrspace 4) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 0, 0 :: (dereferenceable load (s32), addrspace 4) ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (invariant load (s32), addrspace 4) - ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (invariant load (s32), addrspace 4) + ; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (dereferenceable load (s32), addrspace 4) + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable load (s32), addrspace 4) ; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 @@ -417,9 +449,9 @@ ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3 %0:sreg_64_xexec = COPY $sgpr8_sgpr9 - %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (invariant load (s32), addrspace 4) - %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 4, 0 :: (invariant load (s32), addrspace 4) - %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (invariant load (s32), addrspace 4) + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable load (s32), addrspace 4) + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 4, 0 :: (dereferenceable load (s32), addrspace 4) + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (dereferenceable load (s32), addrspace 4) S_NOP 0, implicit %1 S_NOP 0, implicit %2 S_NOP 0, implicit %3