Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -262,6 +262,14 @@ /// @param NewValues A map that maps certain llvm::Values to new llvm::Values. void updateValues(ValueMapT &NewValues); + /// Update the values referenced in the subtree to newer values + /// + /// When code generation continues in subfunction/kernel function, the values + /// must be updates with newest values if any. + /// + /// @param SubtreeValues A set of values that must updated + void updateSubtreeValues(SetVector &SubtreeValues); + /// Generate code for a marker now. /// /// For mark nodes with an unknown name, we just forward the code generation Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -340,6 +340,24 @@ } } +void IslNodeBuilder::updateSubtreeValues(SetVector &SubtreeValues) { + + ValueMapT NewValues; + + for (Value *V : SubtreeValues) { + Value *NewValue = ValueMap[V]; + while (NewValue) { + NewValues[V] = NewValue; + NewValue = ValueMap[NewValue]; + } + } + + for (const auto &VP : NewValues) { + SubtreeValues.remove(VP.first); + SubtreeValues.insert(VP.second); + } +} + void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector &IVS, __isl_take isl_id *IteratorID, @@ -624,6 +642,9 @@ SubtreeValues.insert(V); } + // Update the values to be passed to subfunction to newer values if any. + updateSubtreeValues(SubtreeValues); + ValueMapT NewValues; ParallelLoopGenerator ParallelLoopGen(Builder, LI, DT, DL); Index: test/CodeGen/OpenMP/nested_parallel_loops.ll =================================================================== --- /dev/null +++ test/CodeGen/OpenMP/nested_parallel_loops.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-vectorizer=polly -polly-parallel -polly-tile-sizes=11,32,32 -polly-codegen -S < %s | FileCheck %s +; +; CHECK-DAG: store [100 x float]* %polly.subfunc.arg.data, [100 x float]** %polly.subfn.storeaddr.polly.subfunc.arg.data +; CHECK-DAG: store float* %polly.subfunc.arg.mean, float** %polly.subfn.storeaddr.polly.subfunc.arg.mean +; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @func(i32 %m, i32 %n, [100 x float]* noalias %data, float* noalias %mean) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %cmp3 = icmp sgt i32 %m, 0 + br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end10 + +for.cond1.preheader.lr.ph: ; preds = %entry.split + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc8 + %indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc8 ] + %cmp21 = icmp sgt i32 %n, 0 + br i1 %cmp21, label %for.body3.lr.ph, label %for.inc8 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx5 = getelementptr inbounds [100 x float], [100 x float]* %data, i64 %indvars.iv, i64 %indvars.iv5 + %0 = load float, float* %arrayidx5, align 4 + %arrayidx7 = getelementptr inbounds float, float* %mean, i64 %indvars.iv5 + %1 = load float, float* %arrayidx7, align 4 + %add = fadd float %0, %1 + store float %add, float* %arrayidx7, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body3, label %for.cond1.for.inc8_crit_edge + +for.cond1.for.inc8_crit_edge: ; preds = %for.body3 + br label %for.inc8 + +for.inc8: ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1.preheader + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %wide.trip.count7 = zext i32 %m to i64 + %exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7 + br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end10_crit_edge + +for.cond.for.end10_crit_edge: ; preds = %for.inc8 + br label %for.end10 + +for.end10: ; preds = %for.cond.for.end10_crit_edge, %entry.split + ret void +}