Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1006,6 +1006,11 @@ addPass(createAMDGPULowerModuleLDSPass()); } + // AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run + // after their introduction + if (TM.getOptLevel() > CodeGenOpt::None) + addPass(createAMDGPUAttributorPass()); + if (TM.getOptLevel() > CodeGenOpt::None) addPass(createInferAddressSpacesPass()); @@ -1062,9 +1067,6 @@ if (RemoveIncompatibleFunctions) addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM)); - if (TM->getOptLevel() > CodeGenOpt::None) - addPass(createAMDGPUAttributorPass()); - // FIXME: This pass adds 2 hacky attributes that can be replaced with an // analysis, and should be removed. addPass(createAMDGPUAnnotateKernelFeaturesPass()); Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -177,6 +177,9 @@ ; GCN-O1-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-NEXT: Lower OpenCL enqueued blocks ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O1-NEXT: AMDGPU Attributor +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Cycle Info Analysis ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces ; GCN-O1-NEXT: Expand Atomic instructions @@ -216,9 +219,6 @@ ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: TLS Variable Hoist ; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions -; GCN-O1-NEXT: AMDGPU Attributor -; GCN-O1-NEXT: FunctionPass Manager -; GCN-O1-NEXT: Cycle Info Analysis ; GCN-O1-NEXT: CallGraph Construction ; GCN-O1-NEXT: Call Graph SCC Pass Manager ; GCN-O1-NEXT: AMDGPU Annotate Kernel Features @@ -446,6 +446,9 @@ ; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O1-OPTS-NEXT: AMDGPU Attributor +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Cycle Info Analysis ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Infer address spaces ; GCN-O1-OPTS-NEXT: Expand Atomic instructions @@ -493,9 +496,6 @@ ; GCN-O1-OPTS-NEXT: TLS Variable Hoist ; GCN-O1-OPTS-NEXT: Early CSE ; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions -; GCN-O1-OPTS-NEXT: AMDGPU Attributor -; GCN-O1-OPTS-NEXT: FunctionPass Manager -; GCN-O1-OPTS-NEXT: Cycle Info Analysis ; GCN-O1-OPTS-NEXT: CallGraph Construction ; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager ; GCN-O1-OPTS-NEXT: AMDGPU Annotate Kernel Features @@ -737,6 +737,9 @@ ; GCN-O2-NEXT: Early propagate attributes from kernels to functions ; GCN-O2-NEXT: Lower OpenCL enqueued blocks ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O2-NEXT: AMDGPU Attributor +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Cycle Info Analysis ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Infer address spaces ; GCN-O2-NEXT: Expand Atomic instructions @@ -792,9 +795,6 @@ ; GCN-O2-NEXT: TLS Variable Hoist ; GCN-O2-NEXT: Early CSE ; GCN-O2-NEXT: AMDGPU Remove Incompatible Functions -; GCN-O2-NEXT: AMDGPU Attributor -; GCN-O2-NEXT: FunctionPass Manager -; GCN-O2-NEXT: Cycle Info Analysis ; GCN-O2-NEXT: CallGraph Construction ; GCN-O2-NEXT: Call Graph SCC Pass Manager ; GCN-O2-NEXT: AMDGPU Annotate Kernel Features @@ -1039,6 +1039,9 @@ ; GCN-O3-NEXT: Early propagate attributes from kernels to functions ; GCN-O3-NEXT: Lower OpenCL enqueued blocks ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O3-NEXT: AMDGPU Attributor +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Cycle Info Analysis ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Infer address spaces ; GCN-O3-NEXT: Expand Atomic instructions @@ -1106,9 +1109,6 @@ ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering ; GCN-O3-NEXT: AMDGPU Remove Incompatible Functions -; GCN-O3-NEXT: AMDGPU Attributor -; GCN-O3-NEXT: FunctionPass Manager -; GCN-O3-NEXT: Cycle Info Analysis ; GCN-O3-NEXT: CallGraph Construction ; GCN-O3-NEXT: Call Graph SCC Pass Manager ; GCN-O3-NEXT: AMDGPU Annotate Kernel Features Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -43,9 +43,9 @@ ; GFX9-LABEL: test_simple_indirect_call: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x4 -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s6, s9 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 -; GFX9-NEXT: s_add_u32 s0, s0, s9 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; GFX9-NEXT: s_add_u32 s0, s0, s17 ; GFX9-NEXT: s_addc_u32 s1, s1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshr_b32 s4, s4, 16