Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -2359,6 +2359,9 @@ /// Get the name of this Scop. std::string getNameStr() const; + /// Get the original name of this Scop. + std::string getOrigNameStr() const { return name; } + /// Get the constraint on parameter of this Scop. /// /// @return The constraint on parameter of this Scop. Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -12,9 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "polly/CodeGen/PPCGCodeGeneration.h" +#include + #include "polly/CodeGen/IslAst.h" #include "polly/CodeGen/IslNodeBuilder.h" +#include "polly/CodeGen/PPCGCodeGeneration.h" #include "polly/CodeGen/Utils.h" #include "polly/DependenceInfo.h" #include "polly/LinkAllPasses.h" @@ -172,7 +174,8 @@ DominatorTree &DT, Scop &S, BasicBlock *StartBlock, gpu_prog *Prog, GPURuntime Runtime, GPUArch Arch) : IslNodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock), - Prog(Prog), Runtime(Runtime), Arch(Arch) { + ScopNameForIR(retScopNameForIR()), Prog(Prog), Runtime(Runtime), + Arch(Arch) { getExprBuilder().setIDToSAI(&IDToSAI); } @@ -194,6 +197,12 @@ /// The maximal number of loops surrounding a parallel kernel. unsigned DeepestParallel = 0; + /// Return the name of the Scop which can be used as an identifier in LLVM IR. + std::string getScopNameForIR() const { return ScopNameForIR; } + + /// Return the name to set for the ptx_kernel. + std::string getKernelFuncName(int Kernel_id); + private: /// A vector of array base pointers for which a new ScopArrayInfo was created. /// @@ -207,6 +216,9 @@ /// The current GPU context. Value *GPUContext; + /// The name of the Scop which can be used as an identifier in LLVM IR. + const std::string ScopNameForIR; + /// The set of isl_ids allocated in the kernel std::vector KernelIds; @@ -237,6 +249,9 @@ IslExprBuilder::IDToScopArrayInfoTy IDToSAI; + /// Build and return a string to set ScopNameForIR + std::string retScopNameForIR(); + /// Create code for user-defined AST nodes. /// /// These AST nodes can be of type: @@ -557,6 +572,26 @@ Value *Parameters); }; +void replace_char(std::string &bb_name, char find, char replace) { + for (char &a : bb_name) + if (a == find) + a = replace; +} + +std::string GPUNodeBuilder::retScopNameForIR() { + std::string EntryName, ExitName, dummy; + std::stringstream ss(S.getOrigNameStr()); + ss >> EntryName >> dummy >> ExitName; + replace_char(EntryName, '.', '$'); + replace_char(ExitName, '.', '$'); + return EntryName + "_" + ExitName; +} + +std::string GPUNodeBuilder::getKernelFuncName(int Kernel_id) { + return "FUNC_" + S.getFunction().getName().str() + "_SCOP_" + + getScopNameForIR() + "_KERNEL_" + std::to_string(Kernel_id); +} + void GPUNodeBuilder::initializeAfterRTH() { BasicBlock *NewBB = SplitBlock(Builder.GetInsertBlock(), &*Builder.GetInsertPoint(), &DT, &LI); @@ -1516,7 +1551,7 @@ Builder.SetInsertPoint(&HostInsertPoint); Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues); - std::string Name = "kernel_" + std::to_string(Kernel->id); + std::string Name = getKernelFuncName(Kernel->id); Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name); Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name"); Value *GPUKernel = createCallGetKernel(KernelString, NameString); @@ -1557,7 +1592,7 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, SetVector &SubtreeValues) { std::vector Args; - std::string Identifier = "kernel_" + std::to_string(Kernel->id); + std::string Identifier = getKernelFuncName(Kernel->id); for (long i = 0; i < Prog->n_array; i++) { if (!ppcg_kernel_requires_array_argument(Kernel, i)) @@ -1821,7 +1856,7 @@ void GPUNodeBuilder::createKernelFunction( ppcg_kernel *Kernel, SetVector &SubtreeValues, SetVector &SubtreeFunctions) { - std::string Identifier = "kernel_" + std::to_string(Kernel->id); + std::string Identifier = getKernelFuncName(Kernel->id); GPUModule.reset(new Module(Identifier, Builder.getContext())); switch (Arch) { Index: test/GPGPU/cuda-annotations.ll =================================================================== --- test/GPGPU/cuda-annotations.ll +++ test/GPGPU/cuda-annotations.ll @@ -4,11 +4,11 @@ ; REQUIRES: pollyacc -; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 { +; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_bb1_bb8_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 { ; KERNEL: !nvvm.annotations = !{!0} -; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} +; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_SCOP_bb1_bb8_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/GPGPU/cuda-managed-memory-simple.ll =================================================================== --- test/GPGPU/cuda-managed-memory-simple.ll +++ test/GPGPU/cuda-managed-memory-simple.ll @@ -54,7 +54,7 @@ ; CHECK-NEXT: %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3 ; CHECK-NEXT: %23 = bitcast i32* %polly_launch_0_param_size_1 to i8* ; CHECK-NEXT: store i8* %23, i8** %22 -; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0)) +; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([942 x i8], [942 x i8]* @"FUNC_copy_SCOP_for$cond_for$end_KERNEL_0", i32 0, i32 0), i8* getelementptr inbounds ([41 x i8], [41 x i8]* @"FUNC_copy_SCOP_for$cond_for$end_KERNEL_0_name", i32 0, i32 0)) ; CHECK-NEXT: call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr) ; CHECK-NEXT: call void @polly_freeKernel(i8* %24) ; CHECK-NEXT: call void @polly_synchronizeDevice() Index: test/GPGPU/host-control-flow.ll =================================================================== --- test/GPGPU/host-control-flow.ll +++ test/GPGPU/host-control-flow.ll @@ -42,7 +42,7 @@ ; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99 ; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit -; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %c0) +; KERNEL-IR: define ptx_kernel void @"FUNC_foo_SCOP_for$cond_for$end20_KERNEL_0"(i8 addrspace(1)* %MemRef_A, i64 %c0) ; KERNEL-IR-LABEL: entry: ; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 Index: test/GPGPU/invariant-load-hoisting.ll =================================================================== --- test/GPGPU/invariant-load-hoisting.ll +++ test/GPGPU/invariant-load-hoisting.ll @@ -21,7 +21,7 @@ ; HOST-IR: call void @polly_launchKernel(i8* %215, i32 %221, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr) ; HOST-IR-NEXT: call void @polly_freeKernel(i8* %215) ; -; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 { +; KERNEL-IR: define ptx_kernel void @"FUNC_f_SCOP_entry$split_for$end26_KERNEL_0"(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 { ; ; Check that we generate correct GPU code in case of invariant load hoisting. ; Index: test/GPGPU/kernel-params-only-some-arrays.ll =================================================================== --- test/GPGPU/kernel-params-only-some-arrays.ll +++ test/GPGPU/kernel-params-only-some-arrays.ll @@ -16,12 +16,12 @@ ; B[i] += 42; ; } -; KERNEL: ; ModuleID = 'kernel_0' -; KERNEL-NEXT: source_filename = "kernel_0" +; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_for$cond_for$end9_KERNEL_0' +; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_for$cond_for$end9_KERNEL_0" ; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A) +; KERNEL: define ptx_kernel void @"FUNC_kernel_params_only_some_arrays_SCOP_for$cond_for$end9_KERNEL_0"(i8 addrspace(1)* %MemRef_A) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 @@ -31,12 +31,12 @@ ; KERNEL: ret void ; KERNEL-NEXT: } -; KERNEL: ; ModuleID = 'kernel_1' -; KERNEL-NEXT: source_filename = "kernel_1" +; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_for$cond_for$end9_KERNEL_1' +; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_for$cond_for$end9_KERNEL_1" ; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @kernel_1(i8 addrspace(1)* %MemRef_B) +; KERNEL: define ptx_kernel void @"FUNC_kernel_params_only_some_arrays_SCOP_for$cond_for$end9_KERNEL_1"(i8 addrspace(1)* %MemRef_B) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 Index: test/GPGPU/kernel-params-scop-parameter.ll =================================================================== --- test/GPGPU/kernel-params-scop-parameter.ll +++ test/GPGPU/kernel-params-scop-parameter.ll @@ -9,7 +9,7 @@ ; A[i] += 42; ; } -; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) +; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_SCOP_bb1_bb8_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"