Index: llvm/include/llvm/CodeGen/GlobalISel/Localizer.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -82,9 +82,7 @@ MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties() - .set(MachineFunctionProperties::Property::IsSSA) - .set(MachineFunctionProperties::Property::Legalized) - .set(MachineFunctionProperties::Property::RegBankSelected); + .set(MachineFunctionProperties::Property::IsSSA); } void getAnalysisUsage(AnalysisUsage &AU) const override; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -624,7 +624,6 @@ void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; - void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; void addFastRegAlloc() override; void addOptimizedRegAlloc() override; @@ -904,6 +903,7 @@ void GCNPassConfig::addPreLegalizeMachineIR() { bool IsOptNone = getOptLevel() == CodeGenOpt::None; addPass(createAMDGPUPreLegalizeCombiner(IsOptNone)); + addPass(new Localizer()); } bool GCNPassConfig::addLegalizeMachineIR() { @@ -916,12 +916,6 @@ return false; } -void GCNPassConfig::addPreGlobalInstructionSelect() { - // FIXME: We should run this before legalizing globals, but for some reason - // this requires legalized and regbankselected. - addPass(new Localizer()); -} - bool GCNPassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -75,47 +75,39 @@ define amdgpu_kernel void @localize_globals(i1 %cond) { ; GFX9-LABEL: localize_globals: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX9-NEXT: s_getpc_b64 s[2:3] -; GFX9-NEXT: s_add_u32 s2, s2, gv2@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s3, s3, gv2@gotpcrel32@hi+4 -; GFX9-NEXT: s_getpc_b64 s[0:1] -; GFX9-NEXT: s_add_u32 s0, s0, gv3@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s1, s1, gv3@gotpcrel32@hi+4 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, gv0@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, gv0@gotpcrel32@hi+4 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, gv1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, gv1@gotpcrel32@hi+4 +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_and_b32 s4, s4, 1 -; GFX9-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-NEXT: s_and_b32 s0, s0, 1 +; GFX9-NEXT: s_cmp_lg_u32 s0, 0 ; GFX9-NEXT: s_cbranch_scc0 BB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: s_getpc_b64 s[0:1] +; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+4 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_getpc_b64 s[2:3] +; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+4 ; GFX9-NEXT: s_branch BB1_3 ; GFX9-NEXT: BB1_2: ; %bb1 -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-NEXT: s_getpc_b64 s[0:1] +; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+4 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_getpc_b64 s[2:3] +; GFX9-NEXT: s_add_u32 s2, s2, gv3@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s3, s3, gv3@gotpcrel32@hi+4 +; GFX9-NEXT: BB1_3: ; %bb2 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_mov_b32_e32 v5, 1 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v3, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: BB1_3: ; %bb2 -; GFX9-NEXT: global_store_dword v[0:1], v4, off -; GFX9-NEXT: global_store_dword v[2:3], v5, off +; GFX9-NEXT: global_store_dword v[0:1], v3, off ; GFX9-NEXT: s_endpgm entry: br i1 %cond, label %bb0, label %bb1 @@ -143,49 +135,49 @@ ; GFX9-LABEL: localize_internal_globals: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getpc_b64 s[10:11] -; GFX9-NEXT: s_add_u32 s10, s10, static.gv2@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s11, s11, static.gv2@rel32@hi+4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, static.gv3@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, static.gv3@rel32@hi+4 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+4 -; GFX9-NEXT: s_xor_b64 s[12:13], vcc, s[12:13] -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, static.gv1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, static.gv1@rel32@hi+4 -; GFX9-NEXT: s_and_saveexec_b64 s[14:15], s[12:13] -; GFX9-NEXT: s_xor_b64 s[12:13], exec, s[14:15] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; GFX9-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7] ; GFX9-NEXT: s_cbranch_execnz BB2_2 ; GFX9-NEXT: ; %bb.1: ; %bb1 -; GFX9-NEXT: v_mov_b32_e32 v0, s10 +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+4 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, s11 +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, static.gv3@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, static.gv3@rel32@hi+4 ; GFX9-NEXT: global_store_dword v[0:1], v2, off -; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v2, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: BB2_2: ; %Flow -; GFX9-NEXT: s_or_saveexec_b64 s[8:9], s[12:13] -; GFX9-NEXT: s_xor_b64 exec, exec, s[8:9] +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_cbranch_execz BB2_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+4 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, static.gv1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, static.gv1@rel32@hi+4 ; GFX9-NEXT: global_store_dword v[0:1], v2, off -; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v2, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: BB2_4: ; %bb2 -; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] entry: