Index: polly/trunk/include/polly/CodeGen/CodeGeneration.h =================================================================== --- polly/trunk/include/polly/CodeGen/CodeGeneration.h +++ polly/trunk/include/polly/CodeGen/CodeGeneration.h @@ -12,6 +12,7 @@ #ifndef POLLY_CODEGENERATION_H #define POLLY_CODEGENERATION_H +#include "IRBuilder.h" #include "polly/Config/config.h" #include "polly/ScopPass.h" #include "isl/map.h" @@ -25,6 +26,12 @@ }; extern VectorizerChoice PollyVectorizerChoice; +/// Mark a basic block unreachable. +/// +/// Marks the basic block @p Block unreachable by equipping it with an +/// UnreachableInst. +void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder); + struct CodeGenerationPass : public PassInfoMixin { PreservedAnalyses run(Scop &S, ScopAnalysisManager &SAM, ScopStandardAnalysisResults &AR, SPMUpdater &U); Index: polly/trunk/lib/CodeGen/CodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/CodeGeneration.cpp +++ polly/trunk/lib/CodeGen/CodeGeneration.cpp @@ -54,6 +54,20 @@ cl::desc("Add run-time performance monitoring"), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +namespace polly { +/// Mark a basic block unreachable. +/// +/// Marks the basic block @p Block unreachable by equipping it with an +/// UnreachableInst. +void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { + auto *OrigTerminator = Block.getTerminator(); + Builder.SetInsertPoint(OrigTerminator); + Builder.CreateUnreachable(); + OrigTerminator->eraseFromParent(); +} + +} // namespace polly + namespace { static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) { @@ -86,17 +100,6 @@ } } -/// Mark a basic block unreachable. -/// -/// Marks the basic block @p Block unreachable by equipping it with an -/// UnreachableInst. -static void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { - auto *OrigTerminator = Block.getTerminator(); - Builder.SetInsertPoint(OrigTerminator); - Builder.CreateUnreachable(); - OrigTerminator->eraseFromParent(); -} - /// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from /// @R. /// Index: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp +++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "polly/CodeGen/PPCGCodeGeneration.h" +#include "polly/CodeGen/CodeGeneration.h" #include "polly/CodeGen/IslAst.h" #include "polly/CodeGen/IslNodeBuilder.h" #include "polly/CodeGen/Utils.h" @@ -3373,7 +3374,6 @@ // TODO: Handle LICM auto SplitBlock = StartBlock->getSinglePredecessor(); Builder.SetInsertPoint(SplitBlock->getTerminator()); - NodeBuilder.addParameters(S->getContext().release()); isl_ast_build *Build = isl_ast_build_alloc(S->getIslCtx()); isl_ast_expr *Condition = IslAst::buildRunCondition(*S, Build); @@ -3383,17 +3383,34 @@ // preload invariant loads. Note: This should happen before the RTC // because the RTC may depend on values that are invariant load hoisted. - if (!NodeBuilder.preloadInvariantLoads()) - report_fatal_error("preloading invariant loads failed in function: " + - S->getFunction().getName() + - " | Scop Region: " + S->getNameStr()); + if (!NodeBuilder.preloadInvariantLoads()) { + DEBUG(dbgs() << "preloading invariant loads failed in function: " + + S->getFunction().getName() + + " | Scop Region: " + S->getNameStr()); + // adjust the dominator tree accordingly. + auto *ExitingBlock = StartBlock->getUniqueSuccessor(); + assert(ExitingBlock); + auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); + assert(MergeBlock); + polly::markBlockUnreachable(*StartBlock, Builder); + polly::markBlockUnreachable(*ExitingBlock, Builder); + auto *ExitingBB = S->getExitingBlock(); + assert(ExitingBB); + + DT->changeImmediateDominator(MergeBlock, ExitingBB); + DT->eraseNode(ExitingBlock); + isl_ast_expr_free(Condition); + isl_ast_node_free(Root); + } else { - Value *RTC = NodeBuilder.createRTC(Condition); - Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); + NodeBuilder.addParameters(S->getContext().release()); + Value *RTC = NodeBuilder.createRTC(Condition); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); - Builder.SetInsertPoint(&*StartBlock->begin()); + Builder.SetInsertPoint(&*StartBlock->begin()); - NodeBuilder.create(Root); + NodeBuilder.create(Root); + } /// In case a sequential kernel has more surrounding loops as any parallel /// kernel, the SCoP is probably mostly sequential. Hence, there is no Index: polly/trunk/test/GPGPU/failing-invariant-load-handling.ll =================================================================== --- polly/trunk/test/GPGPU/failing-invariant-load-handling.ll +++ polly/trunk/test/GPGPU/failing-invariant-load-handling.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly < %s -analyze -polly-scops -polly-process-unprofitable -polly-invariant-load-hoisting | FileCheck %s -check-prefix=SCOPS +; RUN: opt %loadPolly -S < %s -polly-codegen-ppcg -polly-process-unprofitable -polly-invariant-load-hoisting | FileCheck %s -check-prefix=CODEGEN + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64" + +%S = type { i32, i32, [12 x %L] } +%L = type { i32, i32, double, i32, i32, i32, i32, i32 } + +define void @test(%S* %cpi, i1 %b) { +; SCOPS-LABEL: Region: %if.then14---%exit +; SCOPS: Invariant Accesses: { +; SCOPS-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOPS-NEXT: [l2, l1] -> { Stmt_for_body_i[i0] -> MemRef_cpi[0, 0] }; +; SCOPS-NEXT: Execution Context: [l2, l1] -> { : } +; SCOPS-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOPS-NEXT: [l2, l1] -> { Stmt_for_body_lr_ph_i[] -> MemRef_cpi[0, 1] }; +; SCOPS-NEXT: Execution Context: [l2, l1] -> { : l2 > 0 } +; SCOPS-NEXT: } +; SCOPS: Arrays { +; SCOPS-NEXT: i32 MemRef_cpi[*][(10 * %l1)]; // Element size 4 +; SCOPS-NEXT: } + +; Check that we gracefully handle failing invariant loads. +; This test case is taken from: +; test/Isl/CodeGen/invariant-load-dimension.ll + +; FIXME: Figure out how to actually generate code for this loop. +; CODEGEN-NOT: LLVM ERROR: preloading invariant loads failed in function + +entry: + %nt = getelementptr inbounds %S, %S* %cpi, i32 0, i32 1 + br i1 %b, label %if.then14, label %exit + +if.then14: + %ns = getelementptr inbounds %S, %S* %cpi, i32 0, i32 0 + %l0 = load i32, i32* %ns, align 8 + %cmp12.i = icmp sgt i32 %l0, 0 + br i1 %cmp12.i, label %for.body.lr.ph.i, label %exit + +for.body.lr.ph.i: + %l1 = load i32, i32* %nt, align 4 + br label %for.body.i + +for.body.i: + %phi = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc, %for.body.i ] + %mul.i163 = mul nsw i32 %phi, %l1 + %cv = getelementptr inbounds %S, %S* %cpi, i32 0, i32 2, i32 %mul.i163, i32 0 + store i32 0, i32* %cv, align 8 + %inc = add nuw nsw i32 %phi, 1 + %l2 = load i32, i32* %ns, align 8 + %cmp.i164 = icmp slt i32 %inc, %l2 + br i1 %cmp.i164, label %for.body.i, label %exit + +exit: + ret void +}