Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -2114,6 +2114,8 @@ auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); PPCGScop->options = createPPCGOptions(); + // enable live range reordering + PPCGScop->options->live_range_reordering = 1; PPCGScop->start = 0; PPCGScop->end = 0; @@ -2129,10 +2131,16 @@ PPCGScop->tagged_must_writes = getTaggedMustWrites(); PPCGScop->must_writes = S->getMustWrites(); PPCGScop->live_out = nullptr; - PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); PPCGScop->tagger = nullptr; - PPCGScop->independence = nullptr; + /* + ps->independence = isl_union_map_empty(isl_set_get_space(ps->context)); + for (i = 0; i < scop->n_independence; ++i) + ps->independence = isl_union_map_union(ps->independence, + isl_union_map_copy(scop->independences[i]->filter)); + */ + PPCGScop->independence = + isl_union_map_empty(isl_set_get_space(PPCGScop->context)); PPCGScop->dep_flow = nullptr; PPCGScop->tagged_dep_flow = nullptr; PPCGScop->dep_false = nullptr; @@ -2140,9 +2148,82 @@ PPCGScop->dep_order = nullptr; PPCGScop->tagged_dep_order = nullptr; + + + // Collect phi nodes in scop. + SetVector KillMemIds; + for (ScopStmt &Stmt : *S) { + for (MemoryAccess *MemRef : Stmt) { + if (MemRef->getLatestKind() == MemoryKind::PHI) { + isl_map *AccessRel = MemRef->getLatestAccessRelation(); + isl_id *KillId = isl_map_get_tuple_id(AccessRel, isl_dim_out); + if (KillMemIds.count(KillId) == 0) { + KillMemIds.insert(KillId); + } else { + isl_id_free(KillId); + } + isl_map_free(AccessRel); + } + } + } + + + // Will be modified inside loop/ + PPCGScop->tagged_must_kills = + isl_union_map_from_map(isl_map_universe(S->getParamSpace())); + + // Will be modified inside loop PPCGScop->schedule = S->getScheduleTree(); - PPCGScop->names = getNames(); + DEBUG_PRINT("Scop Schedule (Original)", PPCGScop->schedule, schedule); + + for (isl_id *killId : KillMemIds) { + + + // PPCGScop->tagged_must_kill adjustment + // ===================================== + isl_id *KillStmtId = isl_id_alloc(S->getIslCtx(), "SKill_phantom", NULL); + // [param] -> { S_2[] -> memref[] } + isl_map *TaggedMustKillStmtMap = isl_map_universe(S->getParamSpace()); + TaggedMustKillStmtMap = isl_map_set_tuple_id( + TaggedMustKillStmtMap, isl_dim_in, isl_id_copy(KillStmtId)); + TaggedMustKillStmtMap = isl_map_set_tuple_id( + TaggedMustKillStmtMap, isl_dim_out, isl_id_copy(killId)); + + isl_id *PhantomRefId = isl_id_alloc(S->getIslCtx(), "ref_phantom", NULL); + + // [param] -> { phantom_ref[] -> memref[] } + isl_map *TaggedMustKillRefMap = isl_map_universe(S->getParamSpace()); + TaggedMustKillRefMap = + isl_map_set_tuple_id(TaggedMustKillRefMap, isl_dim_in, PhantomRefId); + TaggedMustKillRefMap = isl_map_set_tuple_id( + TaggedMustKillRefMap, isl_dim_out, killId); + + // [param] -> { [Stmt[] -> phantom_ref[]] -> memref[] } + isl_map *TaggedMustKill = + isl_map_domain_product(TaggedMustKillStmtMap, TaggedMustKillRefMap); + PPCGScop->tagged_must_kills = isl_union_map_union( + PPCGScop->tagged_must_kills, isl_union_map_from_map(TaggedMustKill)); + + // Schedule adjustment + // =================== + + isl_space *KillStmtSpace = S->getParamSpace(); + KillStmtSpace = + isl_space_set_tuple_id(KillStmtSpace, isl_dim_set, KillStmtId); + isl_union_set *KillStmtDomain = + isl_union_set_from_set(isl_set_universe(KillStmtSpace)); + + isl_schedule *KillSchedule = isl_schedule_from_domain(KillStmtDomain); + DEBUG_PRINT("Kill schedule: ", KillSchedule, schedule); + + + PPCGScop->schedule = isl_schedule_sequence(PPCGScop->schedule, KillSchedule); + DEBUG_PRINT("Scop schedule after adding kill schedule: ", PPCGScop->schedule, + schedule); + } + + PPCGScop->names = getNames(); PPCGScop->pet = nullptr; compute_tagger(PPCGScop); @@ -2414,7 +2495,13 @@ PPCGProg->to_inner = getArrayIdentity(); PPCGProg->to_outer = getArrayIdentity(); PPCGProg->any_to_outer = nullptr; - PPCGProg->array_order = nullptr; + + // this needs to be set when live range reordering is enabled. + // NOTE: I am not sure if this is conservatively correct. I'm not sure + // what the semantics of this is. + // Quoting PPCG/gpu.h: "Order dependences on non-scalars." + PPCGProg->array_order = + isl_union_map_empty(isl_set_get_space(PPCGScop->context)); PPCGProg->n_stmts = std::distance(S->begin(), S->end()); PPCGProg->stmts = getStatements(); PPCGProg->n_array = std::distance(S->array_begin(), S->array_end()); @@ -2425,6 +2512,19 @@ PPCGProg->may_persist = compute_may_persist(PPCGProg); + DEBUG_PRINT("PPCGProg->array_order finalized", PPCGProg->array_order, + union_map); + DEBUG_PRINT("PPCGProg->read finalized", PPCGProg->read, union_map); + DEBUG_PRINT("PPCGProg->may_write finalized", PPCGProg->may_write, + union_map); + DEBUG_PRINT("PPCGProg->must_write finalized", PPCGProg->must_write, + union_map); + DEBUG_PRINT("PPCGProg->tagged_must_kill finalized", + PPCGProg->tagged_must_kill, union_map); + DEBUG_PRINT("PPCGProg->to_inner finalized", PPCGProg->to_inner, union_map); + DEBUG_PRINT("PPCGProg->to_outer finalized", PPCGProg->to_outer, union_map); + DEBUG_PRINT("PPCGProg->any_to_outer finalized", PPCGProg->any_to_outer, + union_map); return PPCGProg; } Index: lib/External/ppcg/gpu.h =================================================================== --- lib/External/ppcg/gpu.h +++ lib/External/ppcg/gpu.h @@ -374,4 +374,5 @@ __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog); void collect_references(struct gpu_prog *prog, struct gpu_array_info *array); + #endif Index: lib/External/ppcg/gpu.c =================================================================== --- lib/External/ppcg/gpu.c +++ lib/External/ppcg/gpu.c @@ -4070,6 +4070,7 @@ sc = isl_schedule_constraints_set_context(sc, isl_set_copy(prog->scop->context)); if (prog->scop->options->live_range_reordering) { + printf("live range reordering enabled.\n"); sc = isl_schedule_constraints_set_conditional_validity(sc, isl_union_map_copy(prog->scop->tagged_dep_flow), isl_union_map_copy(prog->scop->tagged_dep_order)); @@ -4082,9 +4083,14 @@ coincidence = isl_union_map_copy(validity); coincidence = isl_union_map_subtract(coincidence, isl_union_map_copy(prog->scop->independence)); + + // DEBUG_PRINT("coincidence: ", coincidence, union_map); + // DEBUG_PRINT("** array order: ", prog->array_order, union_map); coincidence = isl_union_map_union(coincidence, isl_union_map_copy(prog->array_order)); + assert(coincidence != NULL && "coincidence should be correct."); } else { + assert(0 && "why is live range reordering disabled?"); dep_raw = isl_union_map_copy(prog->scop->dep_flow); dep = isl_union_map_copy(prog->scop->dep_false); dep = isl_union_map_union(dep, dep_raw); @@ -4094,7 +4100,10 @@ validity = dep; } sc = isl_schedule_constraints_set_validity(sc, validity); + // DEBUG_PRINT("sc: ", sc, schedule_constraints); + // DEBUG_PRINT("coincidence: ", coincidence, union_map); sc = isl_schedule_constraints_set_coincidence(sc, coincidence); + // DEBUG_PRINT("sc: ", sc, schedule_constraints); sc = isl_schedule_constraints_set_proximity(sc, proximity); if (prog->scop->options->debug->dump_schedule_constraints) @@ -4115,8 +4124,9 @@ isl_schedule *schedule; sc = construct_schedule_constraints(gen->prog); + // DEBUG_PRINT("sc: ", sc, schedule_constraints); schedule = isl_schedule_constraints_compute_schedule(sc); - + return schedule; } @@ -4277,10 +4287,13 @@ schedule = load_schedule(gen->ctx, gen->options->load_schedule_file); } else { + printf("reschedule?: (%d)\n", gen->options->reschedule); if (gen->options->reschedule) schedule = compute_schedule(gen); - else + else { + assert(0 && "does not enter here"); schedule = determine_properties_original_schedule(gen); + } if (gen->options->save_schedule_file) save_schedule(schedule, gen->options->save_schedule_file); @@ -5376,6 +5389,7 @@ if (!prog->stmts) return gpu_prog_free(prog); + printf("***** collect array info\n"); if (collect_array_info(prog) < 0) return gpu_prog_free(prog); prog->may_persist = compute_may_persist(prog); Index: lib/External/ppcg/ppcg.h =================================================================== --- lib/External/ppcg/ppcg.h +++ lib/External/ppcg/ppcg.h @@ -12,6 +12,12 @@ const char *ppcg_base_name(const char *filename); int ppcg_extract_base_name(char *name, const char *input); +#define DEBUG_PRINT(NAME, OBJ, TYPE) {fprintf(stderr, "@@@ %s:%d %s\n", __FILE__, __LINE__, #NAME); \ + if(OBJ == NULL) fprintf(stderr, "nullptr"); else isl_ ## TYPE ## _dump(OBJ); \ + fprintf(stderr, "\n---\n");}; + +#undef DEBUG_PRINT +#define DEBUG_PRINT(NAME, OBJ, TYPE) if(0) {}; /* Representation of the scop for use inside PPCG. * Index: lib/External/ppcg/ppcg.c =================================================================== --- lib/External/ppcg/ppcg.c +++ lib/External/ppcg/ppcg.c @@ -698,6 +698,21 @@ */ void compute_dependences(struct ppcg_scop *scop) { + + // compute_tagged_dep_flow_only + DEBUG_PRINT("scop->tagged_must_kills",scop->tagged_must_kills, union_map); + DEBUG_PRINT("scop->tagged_must_writes",scop->tagged_must_writes, union_map); + DEBUG_PRINT("scop->tagged_reads", scop->tagged_reads, union_map); + DEBUG_PRINT("scop->tagged_may_writes", scop->tagged_may_writes, union_map); + //remove_independences_from_tagged_flow + DEBUG_PRINT("scop->tagged_dep_flow", scop->tagged_dep_flow, union_map); + DEBUG_PRINT("scop->independence", scop->independence, union_map); + // compute_order_dependences + DEBUG_PRINT("scop->schedule", scop->schedule, schedule); + // forced dependences + DEBUG_PRINT("scop->live_out", scop->live_out, union_map); + DEBUG_PRINT("scop->live_in", scop->live_out, union_map); + isl_union_map *may_source; isl_union_access_info *access; isl_union_flow *flow; Index: test/GPGPU/privatization-simple.ll =================================================================== --- /dev/null +++ test/GPGPU/privatization-simple.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP +; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR + +; SCOP: Function: f +; SCOP-NEXT: Region: %for.body---%for.end +; SCOP-NEXT: Max Loop Depth: 1 + +; Check that kernel launch is generated in host IR. +; the declare would not be generated unless a call to a kernel exists. +; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) + +; void f(int A[], int B[], int control, int C[]) { +; int x; +; #pragma scop +; for(int i = 0; i < 1000; i ++) { +; x = 0; +; if(control) x = C[i]; +; B[i] = x * A[i]; +; +; } +; #pragma endscop +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B, i32 %control, i32* %C) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %if.end + %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %if.end ] + %tobool = icmp eq i32 %control, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %tmp4 = load i32, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %for.body, %if.then + %x.0 = phi i32 [ %tmp4, %if.then ], [ 0, %for.body ] + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp8 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %tmp8, %x.0 + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %mul, i32* %arrayidx4, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %if.end + ret void +} Index: test/GPGPU/privatization.ll =================================================================== --- /dev/null +++ test/GPGPU/privatization.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP +; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR + +; SCOP: Function: checkPrivatization +; SCOP-NEXT: Region: %for.body---%for.end +; SCOP-NEXT: Max Loop Depth: 1 + + +; Check that kernel launch is generated in host IR. +; the declare would not be generated unless a call to a kernel exists. +; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) + +; +; +; void checkPrivatization(int A[], int B[], int C[], int control) { +; int x; +; #pragma scop +; for (int i = 0; i < 1000; i++) { +; x = 0; +; if (control) +; x += C[i]; +; +; B[i] = x * A[i]; +; } +; #pragma endscop +; } +; +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @checkPrivatization(i32* %A, i32* %B, i32* %C, i32 %control) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %if.end + %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %if.end ] + %tobool = icmp eq i32 %control, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %tmp4 = load i32, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %for.body, %if.then + %x.0 = phi i32 [ %tmp4, %if.then ], [ 0, %for.body ] + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp9 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %tmp9, %x.0 + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %mul, i32* %arrayidx4, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %if.end + ret void +} Index: unittests/ScopPassManager/PassManagerTest.cpp =================================================================== --- unittests/ScopPassManager/PassManagerTest.cpp +++ unittests/ScopPassManager/PassManagerTest.cpp @@ -1,9 +1,9 @@ -#include "llvm/IR/PassManager.h" #include "polly/CodeGen/IslAst.h" #include "polly/DependenceInfo.h" #include "polly/ScopPass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "gtest/gtest.h"