diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -243,6 +243,9 @@ void initializeGCNNSAReassignPass(PassRegistry &); extern char &GCNNSAReassignID; +FunctionPass *createAMDGPUPromotePointerKernArgsToGlobalPass(); +void initializeAMDGPUPromotePointerKernArgsToGlobalPass(PassRegistry &); + namespace AMDGPU { enum TargetIndex { TI_CONSTDATA_START, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromotePointerKernArgsToGlobal.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromotePointerKernArgsToGlobal.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromotePointerKernArgsToGlobal.cpp @@ -0,0 +1,72 @@ +//===-- AMDGPUPromotePointerKernArgsToGlobal.cpp - Promote pointer args ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Generic pointer kernel arguments need promoting to global ones. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-promote-pointer-kernargs" + +namespace { + +class AMDGPUPromotePointerKernArgsToGlobal : public FunctionPass { +public: + static char ID; + + AMDGPUPromotePointerKernArgsToGlobal() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; +}; + +} // End anonymous namespace + +char AMDGPUPromotePointerKernArgsToGlobal::ID = 0; + +INITIALIZE_PASS(AMDGPUPromotePointerKernArgsToGlobal, DEBUG_TYPE, + "Lower intrinsics", false, false) + +bool AMDGPUPromotePointerKernArgsToGlobal::runOnFunction(Function &F) { + // Skip non-entry function. + if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return false; + + auto &Entry = F.getEntryBlock(); + IRBuilder<> IRB(&Entry, Entry.begin()); + + bool Changed = false; + for (auto &Arg : F.args()) { + auto PtrTy = dyn_cast(Arg.getType()); + if (!PtrTy || PtrTy->getPointerAddressSpace() != AMDGPUAS::FLAT_ADDRESS) + continue; + + auto GlobalPtr = + IRB.CreateAddrSpaceCast(&Arg, + PointerType::get(PtrTy->getPointerElementType(), + AMDGPUAS::GLOBAL_ADDRESS), + Arg.getName()); + auto NewFlatPtr = IRB.CreateAddrSpaceCast(GlobalPtr, PtrTy, Arg.getName()); + Arg.replaceAllUsesWith(NewFlatPtr); + // Fix the global pointer itself. + cast(GlobalPtr)->setOperand(0, &Arg); + Changed = true; + } + + return Changed; +} + +FunctionPass *llvm::createAMDGPUPromotePointerKernArgsToGlobalPass() { + return new AMDGPUPromotePointerKernArgsToGlobal(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -217,6 +217,7 @@ initializeAMDGPULowerIntrinsicsPass(*PR); initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); + initializeAMDGPUPromotePointerKernArgsToGlobalPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPUPropagateAttributesEarlyPass(*PR); initializeAMDGPUPropagateAttributesLatePass(*PR); @@ -441,6 +442,9 @@ Builder.addExtension( PassManagerBuilder::EP_CGSCCOptimizerLate, [](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + // Premote generic pointer kernel arguments to global ones. + PM.add(llvm::createAMDGPUPromotePointerKernArgsToGlobalPass()); + // Add infer address spaces pass to the opt pipeline after inlining // but before SROA to increase SROA opportunities. PM.add(createInferAddressSpacesPass()); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -58,6 +58,7 @@ AMDGPUMCInstLower.cpp AMDGPUOpenCLEnqueuedBlockLowering.cpp AMDGPUPromoteAlloca.cpp + AMDGPUPromotePointerKernArgsToGlobal.cpp AMDGPUPropagateAttributes.cpp AMDGPURegisterBankInfo.cpp AMDGPURegisterInfo.cpp diff --git a/llvm/test/CodeGen/AMDGPU/promote-pointer-kernargs.ll b/llvm/test/CodeGen/AMDGPU/promote-pointer-kernargs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-pointer-kernargs.ll @@ -0,0 +1,13 @@ +; RUN: opt -O1 -S -o - -mtriple=amdgcn %s | FileCheck %s + +; CHECK-LABEL: promote_pointer_kernargs +; CHECK-NEXT: addrspacecast i32* %{{.*}} to i32 addrspace(1)* +; CHECK-NEXT: addrspacecast i32* %{{.*}} to i32 addrspace(1)* +; CHECK-NEXT: load i32, i32 addrspace(1)* +; CHECK-NEXT: store i32 %{{.*}}, i32 addrspace(1)* +; CHECK-NEXT: ret void +define amdgpu_kernel void @promote_pointer_kernargs(i32* %out, i32* %in) { + %v = load i32, i32* %in + store i32 %v, i32* %out + ret void +}