Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -1616,6 +1616,12 @@ /// The name of the SCoP (identical to the regions name) std::string name; + /// The ID to be assigned to the next Scop in a function + static int NextScopID; + + /// The name of the function currently under consideration + static std::string CurrentFunc; + // Access functions of the SCoP. // // This owns all the MemoryAccess objects of the Scop created in this pass. @@ -1808,6 +1814,12 @@ /// The smallest statement index not yet assigned. long StmtIdx = 0; + /// A number that uniquely represents a Scop within its function + const int ID; + + /// Return the ID for a new Scop within a function + static int getNextID(std::string ParentFunc); + /// Scop constructor; invoked from ScopBuilder::buildScop. Scop(Region &R, ScalarEvolution &SE, LoopInfo &LI, ScopDetection::DetectionContext &DC); @@ -2378,6 +2390,9 @@ /// Check if the SCoP is to be skipped by ScopPass passes. bool isToBeSkipped() const { return SkipScop; } + /// Return the ID of the Scop + int getID() const { return ID; } + /// Get the name of the entry and exit blocks of this Scop. /// /// These along with the function name can uniquely identify a Scop. Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -3499,6 +3499,18 @@ return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr; } +int Scop::NextScopID = 0; + +std::string Scop::CurrentFunc = ""; + +int Scop::getNextID(std::string ParentFunc) { + if (ParentFunc != CurrentFunc) { + CurrentFunc = ParentFunc; + NextScopID = 0; + } + return NextScopID++; +} + Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI, ScopDetection::DetectionContext &DC) : SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false), @@ -3506,7 +3518,8 @@ MaxLoopDepth(0), CopyStmtsNum(0), SkipScop(false), DC(DC), IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr), Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr), - Schedule(nullptr) { + Schedule(nullptr), + ID(getNextID((*R.getEntry()->getParent()).getName().str())) { if (IslOnErrorAbort) isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT); buildContext(); Index: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp +++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp @@ -686,8 +686,8 @@ }; std::string GPUNodeBuilder::getKernelFuncName(int Kernel_id) { - return "FUNC_" + S.getFunction().getName().str() + "_KERNEL_" + - std::to_string(Kernel_id); + return "FUNC_" + S.getFunction().getName().str() + "_SCOP_" + + std::to_string(S.getID()) + "_KERNEL_" + std::to_string(Kernel_id); } void GPUNodeBuilder::initializeAfterRTH() { Index: polly/trunk/test/GPGPU/cuda-annotations.ll =================================================================== --- polly/trunk/test/GPGPU/cuda-annotations.ll +++ polly/trunk/test/GPGPU/cuda-annotations.ll @@ -4,11 +4,11 @@ ; REQUIRES: pollyacc -; KERNEL: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 { +; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 { ; KERNEL: !nvvm.annotations = !{!0} -; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} +; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_SCOP_0_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1} target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll =================================================================== --- polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll +++ polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll @@ -54,7 +54,7 @@ ; CHECK-NEXT: %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3 ; CHECK-NEXT: %23 = bitcast i32* %polly_launch_0_param_size_1 to i8* ; CHECK-NEXT: store i8* %23, i8** %22 -; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([810 x i8], [810 x i8]* @FUNC_copy_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([19 x i8], [19 x i8]* @FUNC_copy_KERNEL_0_name, i32 0, i32 0)) +; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([852 x i8], [852 x i8]* @FUNC_copy_SCOP_0_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([26 x i8], [26 x i8]* @FUNC_copy_SCOP_0_KERNEL_0_name, i32 0, i32 0)) ; CHECK-NEXT: call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr) ; CHECK-NEXT: call void @polly_freeKernel(i8* %24) ; CHECK-NEXT: call void @polly_synchronizeDevice() Index: polly/trunk/test/GPGPU/host-control-flow.ll =================================================================== --- polly/trunk/test/GPGPU/host-control-flow.ll +++ polly/trunk/test/GPGPU/host-control-flow.ll @@ -42,7 +42,7 @@ ; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99 ; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit -; KERNEL-IR: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0) +; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0) ; KERNEL-IR-LABEL: entry: ; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64 Index: polly/trunk/test/GPGPU/invariant-load-hoisting.ll =================================================================== --- polly/trunk/test/GPGPU/invariant-load-hoisting.ll +++ polly/trunk/test/GPGPU/invariant-load-hoisting.ll @@ -21,7 +21,7 @@ ; HOST-IR: call void @polly_launchKernel(i8* %215, i32 %221, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr) ; HOST-IR-NEXT: call void @polly_freeKernel(i8* %215) ; -; KERNEL-IR: define ptx_kernel void @FUNC_f_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 { +; KERNEL-IR: define ptx_kernel void @FUNC_f_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 { ; ; Check that we generate correct GPU code in case of invariant load hoisting. ; Index: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll =================================================================== --- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll +++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll @@ -16,12 +16,12 @@ ; B[i] += 42; ; } -; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_0' -; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_0" +; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0' +; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0" ; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_0(i8 addrspace(1)* %MemRef_A) +; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 @@ -31,12 +31,12 @@ ; KERNEL: ret void ; KERNEL-NEXT: } -; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_1' -; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_1" +; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1' +; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1" ; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" ; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda" -; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_1(i8 addrspace(1)* %MemRef_B) +; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_B) ; KERNEL-NEXT: entry: ; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() ; KERNEL-NEXT: %b0 = zext i32 %0 to i64 Index: polly/trunk/test/GPGPU/kernel-params-scop-parameter.ll =================================================================== --- polly/trunk/test/GPGPU/kernel-params-scop-parameter.ll +++ polly/trunk/test/GPGPU/kernel-params-scop-parameter.ll @@ -9,7 +9,7 @@ ; A[i] += 42; ; } -; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) +; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/GPGPU/kernels-names-across-scops-funcs.ll =================================================================== --- polly/trunk/test/GPGPU/kernels-names-across-scops-funcs.ll +++ polly/trunk/test/GPGPU/kernels-names-across-scops-funcs.ll @@ -0,0 +1,124 @@ +; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen-ppcg \ +; RUN: -polly-acc-dump-kernel-ir -disable-output < %s | \ +; RUN: FileCheck -check-prefix=KERNEL %s + +; REQUIRES: pollyacc + +; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_arg1, i32 %arg) #0 { +; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_1_KERNEL_0(i8 addrspace(1)* %MemRef_arg1, i32 %arg) #0 { +; KERNEL: define ptx_kernel void @FUNC_foo2_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_arg1, i32 %arg) #0 { + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @foo(i32 %arg, i32* %arg1) #0 { +bb: + br label %bb2 + +bb2: ; preds = %bb + %tmp = icmp sgt i32 %arg, 0 + br i1 %tmp, label %bb3, label %bb13 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb4, %bb3 + %tmp5 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb4 ] + %tmp6 = getelementptr inbounds i32, i32* %arg1, i64 %tmp5 + %tmp7 = load i32, i32* %tmp6, align 4, !tbaa !2 + %tmp8 = add nsw i32 %tmp7, 1 + store i32 %tmp8, i32* %tmp6, align 4, !tbaa !2 + %tmp9 = add nuw nsw i64 %tmp5, 1 + %tmp10 = zext i32 %arg to i64 + %tmp11 = icmp ne i64 %tmp9, %tmp10 + br i1 %tmp11, label %bb4, label %bb12 + +bb12: ; preds = %bb4 + br label %bb13 + +bb13: ; preds = %bb12, %bb2 + %tmp14 = tail call i64 @clock() #3 + %tmp15 = icmp eq i64 %tmp14, 0 + br i1 %tmp15, label %bb16, label %bb29 + +bb16: ; preds = %bb13 + %tmp17 = icmp sgt i32 %arg, 0 + br i1 %tmp17, label %bb18, label %bb28 + +bb18: ; preds = %bb16 + br label %bb19 + +bb19: ; preds = %bb19, %bb18 + %tmp20 = phi i64 [ 0, %bb18 ], [ %tmp24, %bb19 ] + %tmp21 = getelementptr inbounds i32, i32* %arg1, i64 %tmp20 + %tmp22 = load i32, i32* %tmp21, align 4, !tbaa !2 + %tmp23 = add nsw i32 %tmp22, 1 + store i32 %tmp23, i32* %tmp21, align 4, !tbaa !2 + %tmp24 = add nuw nsw i64 %tmp20, 1 + %tmp25 = zext i32 %arg to i64 + %tmp26 = icmp ne i64 %tmp24, %tmp25 + br i1 %tmp26, label %bb19, label %bb27 + +bb27: ; preds = %bb19 + br label %bb28 + +bb28: ; preds = %bb27, %bb16 + br label %bb29 + +bb29: ; preds = %bb28, %bb13 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare i64 @clock() #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define void @foo2(i32 %arg, i32* %arg1) #0 { +bb: + br label %bb2 + +bb2: ; preds = %bb + %tmp = icmp sgt i32 %arg, 0 + br i1 %tmp, label %bb3, label %bb13 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb4, %bb3 + %tmp5 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb4 ] + %tmp6 = getelementptr inbounds i32, i32* %arg1, i64 %tmp5 + %tmp7 = load i32, i32* %tmp6, align 4, !tbaa !2 + %tmp8 = add nsw i32 %tmp7, 1 + store i32 %tmp8, i32* %tmp6, align 4, !tbaa !2 + %tmp9 = add nuw nsw i64 %tmp5, 1 + %tmp10 = zext i32 %arg to i64 + %tmp11 = icmp ne i64 %tmp9, %tmp10 + br i1 %tmp11, label %bb4, label %bb12 + +bb12: ; preds = %bb4 + br label %bb13 + +bb13: ; preds = %bb12, %bb2 + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (http://llvm.org/git/clang 98cf823022d1d71065c71e9338226ebf8bfa36ba) (http://llvm.org/git/llvm.git 4efa61f12928015bad233274ffa2e60c918e9a10)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"}