Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/Passes.h" @@ -622,6 +623,7 @@ void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; + void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; void addFastRegAlloc() override; void addOptimizedRegAlloc() override; @@ -913,6 +915,12 @@ return false; } +void GCNPassConfig::addPreGlobalInstructionSelect() { + // FIXME: We should run this before legalizing globals, but for some reason + // this requires legalized and regbankselected. + addPass(new Localizer()); +} + bool GCNPassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -0,0 +1,206 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s + +; Test the localizer did something and we don't materialize all +; constants in SGPRs in the entry block. + +define amdgpu_kernel void @localize_constants(i1 %cond) { +; GFX9-LABEL: localize_constants: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_and_b32 s0, s0, 1 +; GFX9-NEXT: s_cmp_lg_u32 s0, 0 +; GFX9-NEXT: s_cbranch_scc0 BB0_2 +; GFX9-NEXT: ; %bb.1: ; %bb0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; GFX9-NEXT: BB0_2: ; %bb1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_endpgm +entry: + br i1 %cond, label %bb0, label %bb1 + +bb0: + store volatile i32 123, i32 addrspace(1)* undef + store volatile i32 456, i32 addrspace(1)* undef + store volatile i32 999, i32 addrspace(1)* undef + store volatile i32 1000, i32 addrspace(1)* undef + store volatile i32 455, i32 addrspace(1)* undef + store volatile i32 23526, i32 addrspace(1)* undef + br label %bb2 + +bb1: + store volatile i32 23526, i32 addrspace(1)* undef + store volatile i32 455, i32 addrspace(1)* undef + store volatile i32 1000, i32 addrspace(1)* undef + store volatile i32 456, i32 addrspace(1)* undef + store volatile i32 999, i32 addrspace(1)* undef + store volatile i32 123, i32 addrspace(1)* undef + br label %bb2 + +bb2: + ret void +} + +; FIXME: These aren't localized because thesee were legalized before +; the localizer, and are no longer G_GLOBAL_VALUE. +@gv0 = addrspace(1) global i32 undef, align 4 +@gv1 = addrspace(1) global i32 undef, align 4 +@gv2 = addrspace(1) global i32 undef, align 4 +@gv3 = addrspace(1) global i32 undef, align 4 + +define amdgpu_kernel void @localize_globals(i1 %cond) { +; GFX9-LABEL: localize_globals: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 +; GFX9-NEXT: s_getpc_b64 s[2:3] +; GFX9-NEXT: s_add_u32 s2, s2, gv2@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s3, s3, gv2@gotpcrel32@hi+4 +; GFX9-NEXT: s_getpc_b64 s[0:1] +; GFX9-NEXT: s_add_u32 s0, s0, gv3@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s1, s1, gv3@gotpcrel32@hi+4 +; GFX9-NEXT: s_getpc_b64 s[8:9] +; GFX9-NEXT: s_add_u32 s8, s8, gv0@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s9, s9, gv0@gotpcrel32@hi+4 +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, gv1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, gv1@gotpcrel32@hi+4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_and_b32 s4, s4, 1 +; GFX9-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-NEXT: s_cbranch_scc0 BB1_2 +; GFX9-NEXT: ; %bb.1: ; %bb0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NEXT: s_branch BB1_3 +; GFX9-NEXT: BB1_2: ; %bb1 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NEXT: BB1_3: ; %bb2 +; GFX9-NEXT: global_store_dword v[0:1], v4, off +; GFX9-NEXT: global_store_dword v[2:3], v5, off +; GFX9-NEXT: s_endpgm +entry: + br i1 %cond, label %bb0, label %bb1 + +bb0: + store volatile i32 0, i32 addrspace(1)* @gv0 + store volatile i32 1, i32 addrspace(1)* @gv1 + br label %bb2 + +bb1: + store volatile i32 0, i32 addrspace(1)* @gv2 + store volatile i32 1, i32 addrspace(1)* @gv3 + br label %bb2 + +bb2: + ret void +} + +@static.gv0 = internal addrspace(1) global i32 undef, align 4 +@static.gv1 = internal addrspace(1) global i32 undef, align 4 +@static.gv2 = internal addrspace(1) global i32 undef, align 4 +@static.gv3 = internal addrspace(1) global i32 undef, align 4 + +define void @localize_internal_globals(i1 %cond) { +; GFX9-LABEL: localize_internal_globals: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_getpc_b64 s[10:11] +; GFX9-NEXT: s_add_u32 s10, s10, static.gv2@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s11, s11, static.gv2@rel32@hi+4 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-NEXT: s_getpc_b64 s[8:9] +; GFX9-NEXT: s_add_u32 s8, s8, static.gv3@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s9, s9, static.gv3@rel32@hi+4 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, 1 +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+4 +; GFX9-NEXT: s_xor_b64 s[12:13], vcc, s[12:13] +; GFX9-NEXT: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, static.gv1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, static.gv1@rel32@hi+4 +; GFX9-NEXT: s_and_saveexec_b64 s[14:15], s[12:13] +; GFX9-NEXT: s_xor_b64 s[12:13], exec, s[14:15] +; GFX9-NEXT: s_cbranch_execnz BB2_2 +; GFX9-NEXT: ; %bb.1: ; %bb1 +; GFX9-NEXT: v_mov_b32_e32 v0, s10 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, s11 +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v2, 1 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: BB2_2: ; %Flow +; GFX9-NEXT: s_or_saveexec_b64 s[8:9], s[12:13] +; GFX9-NEXT: s_xor_b64 exec, exec, s[8:9] +; GFX9-NEXT: s_cbranch_execz BB2_4 +; GFX9-NEXT: ; %bb.3: ; %bb0 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v2, 1 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: BB2_4: ; %bb2 +; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +entry: + br i1 %cond, label %bb0, label %bb1 + +bb0: + store volatile i32 0, i32 addrspace(1)* @static.gv0 + store volatile i32 1, i32 addrspace(1)* @static.gv1 + br label %bb2 + +bb1: + store volatile i32 0, i32 addrspace(1)* @static.gv2 + store volatile i32 1, i32 addrspace(1)* @static.gv3 + br label %bb2 + +bb2: + ret void +}