Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -14,9 +14,9 @@ #include "polly/CodeGen/IRBuilder.h" -#include "isl/ast.h" +#include "llvm/ADT/MapVector.h" -#include +#include "isl/ast.h" namespace llvm { class SCEVExpander; @@ -81,7 +81,7 @@ class IslExprBuilder { public: /// @brief A map from isl_ids to llvm::Values. - typedef std::map IDToValueTy; + typedef llvm::MapVector IDToValueTy; /// @brief Construct an IslExprBuilder. /// @@ -125,7 +125,7 @@ private: PollyIRBuilder &Builder; - std::map &IDToValue; + IDToValueTy &IDToValue; /// @brief A SCEVExpander to translate dimension sizes to llvm values. llvm::SCEVExpander &Expander; Index: include/polly/Support/SCEVValidator.h =================================================================== --- include/polly/Support/SCEVValidator.h +++ include/polly/Support/SCEVValidator.h @@ -12,6 +12,7 @@ #ifndef POLLY_SCEV_VALIDATOR_H #define POLLY_SCEV_VALIDATOR_H +#include "llvm/ADT/SetVector.h" #include namespace llvm { @@ -19,9 +20,23 @@ class SCEV; class ScalarEvolution; class Value; +class Loop; } namespace polly { +/// @brief Find the loops referenced from a SCEV expression. +/// +/// @param Expr The SCEV expression to scan for loops. +/// @param Loops A vector into which the found loops are inserted. +void findLoops(const llvm::SCEV *Expr, + llvm::SetVector &Loops); + +/// @brief Find the values referenced by SCEVUnknowns in a given SCEV expression. +/// +/// @param Expr The SCEV expression to scan for SCEVUnknowns. +/// @param Expr A vector into which the found values are inserted. +void findValues(const llvm::SCEV *Expr, llvm::SetVector &Values); + /// Returns true when the SCEV contains references to instructions within the /// region. /// Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -30,7 +30,11 @@ #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ScopHelper.h" +#include "polly/Support/SCEVValidator.h" #include "polly/TempScopInfo.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -56,11 +60,12 @@ class IslNodeBuilder { public: IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, - LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT) - : Builder(Builder), Annotator(Annotator), + const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, + DominatorTree &DT, Scop &S) + : S(S), Builder(Builder), Annotator(Annotator), Rewriter(new SCEVExpander(SE, "polly")), - ExprBuilder(Builder, IDToValue, *Rewriter), P(P), LI(LI), SE(SE), - DT(DT) {} + ExprBuilder(Builder, IDToValue, *Rewriter), P(P), DL(DL), LI(LI), + SE(SE), DT(DT) {} ~IslNodeBuilder() { delete Rewriter; } @@ -69,6 +74,7 @@ IslExprBuilder &getExprBuilder() { return ExprBuilder; } private: + Scop &S; PollyIRBuilder &Builder; ScopAnnotator &Annotator; @@ -77,10 +83,17 @@ IslExprBuilder ExprBuilder; Pass *P; + const DataLayout &DL; LoopInfo &LI; ScalarEvolution &SE; DominatorTree &DT; + /// @brief The current iteration of out-of-scop loops + /// + /// This map provides for a given loop a llvm::Value that contains the current + /// loop iteration. + LoopToScevMapT OutsideLoopIterations; + // This maps an isl_id* to the Value* it has in the generated program. For now // on, the only isl_ids that are stored here are the newly calculated loop // ivs. @@ -95,6 +108,12 @@ /// @param Expr The expression to code generate. Value *generateSCEV(const SCEV *Expr); + /// A set of Value -> Value remappings to apply when generating new code. + /// + /// When generating new code for a ScopStmt this map is used to map certain + /// llvm::Values to new llvm::Values. + ValueMapT ValueMap; + // Extract the upper bound of this loop // // The isl code generation can generate arbitrary expressions to check if the @@ -119,10 +138,49 @@ unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); + /// Compute the values and loops referenced in this subtree. + /// + /// This function looks at all ScopStmts scheduled below the provided For node + /// and finds the llvm::Value[s] and llvm::Loops[s] which are referenced but + /// not locally defined. + /// + /// Values that can be synthesized or that are available as globals are + /// considered locally defined. + /// + /// Loops that contain the scop or that are part of the scop are considered + /// locally defined. Loops that are before the scop, but do not contain the + /// scop itself are considered not locally defined. + /// + /// @param For The node defining the subtree. + /// @param Values A vector that will be filled with the Values referenced in + /// this subtree. + /// @param Loops A vector that will be filled with the Loops referenced in + /// this subtree. + void getReferencesInSubtree(__isl_keep isl_ast_node *For, + SetVector &Values, + SetVector &Loops); + + /// Change the llvm::Value(s) used for code generation. + /// + /// When generating code certain values (e.g., references to induction + /// variables or array base pointers) in the original code may be replaced by + /// new values. This function allows to (partially) update the set of values + /// used. A typical use case for this function is the case when we continue + /// code generation in a subfunction/kernel function and need to explicitly + /// pass down certain values. + /// + /// @param NewValues A map that maps certain llvm::Values to new llvm::Values. + void updateValues(ParallelLoopGenerator::ValueToValueMapTy &NewValues); + void createFor(__isl_take isl_ast_node *For); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); + /// Create LLVM-IR that executes a for node thread parallel. + /// + /// @param For The FOR isl_ast_node for which code is generated. + void createForParallel(__isl_take isl_ast_node *For); + /// Generate LLVM-IR that computes the values of the original induction /// variables in function of the newly generated loop induction variables. /// @@ -238,6 +296,97 @@ return NumberOfIterations + 1; } +struct FindValuesUser { + LoopInfo &LI; + ScalarEvolution &SE; + Region &R; + SetVector &Values; + SetVector &SCEVs; +}; + +/// Extract the values and SCEVs needed to generate code for a ScopStmt. +/// +/// This function extracts a ScopStmt from a given isl_set and computes the +/// Values this statement depends on as well as a set of SCEV expressions that +/// need to be synthesized when generating code for this statment. +static int findValuesInStmt(isl_set *Set, void *User) { + isl_id *Id = isl_set_get_tuple_id(Set); + struct FindValuesUser *U = static_cast(User); + const ScopStmt *Stmt = static_cast(isl_id_get_user(Id)); + const BasicBlock *BB = Stmt->getBasicBlock(); + + // Check all the operands of instructions in the basic block. + for (const Instruction &Inst : *BB) { + for (Value *SrcVal : Inst.operands()) { + if (Instruction *OpInst = dyn_cast(SrcVal)) + if (canSynthesize(OpInst, &U->LI, &U->SE, &U->R)) { + U->SCEVs.insert(U->SE.getSCEVAtScope(OpInst, U->LI.getLoopFor(BB))); + continue; + } + if (Instruction *OpInst = dyn_cast(SrcVal)) + if (Stmt->getParent()->getRegion().contains(OpInst)) + continue; + + if (isa(SrcVal) || isa(SrcVal)) + U->Values.insert(SrcVal); + } + } + isl_id_free(Id); + isl_set_free(Set); + return 0; +} + +void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, + SetVector &Values, + SetVector &Loops) { + + SetVector SCEVs; + struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs}; + + for (const auto &I : IDToValue) + Values.insert(I.second); + + for (const auto &I : OutsideLoopIterations) + Values.insert(cast(I.second)->getValue()); + + isl_union_set *Schedule = isl_union_map_domain(IslAstInfo::getSchedule(For)); + + isl_union_set_foreach_set(Schedule, findValuesInStmt, &FindValues); + isl_union_set_free(Schedule); + + for (const SCEV *Expr : SCEVs) { + findValues(Expr, Values); + findLoops(Expr, Loops); + } + + Values.remove_if([](const Value *V) { return isa(V); }); + + /// Remove loops that contain the scop or that are part of the scop, as they + /// are considered local. This leaves only loops that are before the scop, but + /// do not contain the scop itself. + Loops.remove_if([this](const Loop *L) { + return this->S.getRegion().contains(L) || + L->contains(S.getRegion().getEntry()); + }); +} + +void IslNodeBuilder::updateValues( + ParallelLoopGenerator::ValueToValueMapTy &NewValues) { + SmallPtrSet Inserted; + + for (const auto &I : IDToValue) { + IDToValue[I.first] = NewValues[I.second]; + Inserted.insert(I.second); + } + + for (const auto &I : NewValues) { + if (Inserted.count(I.first)) + continue; + + ValueMap[I.first] = I.second; + } +} + void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector &IVS, __isl_take isl_id *IteratorID, @@ -315,7 +464,7 @@ llvm_unreachable("Unhandled isl_ast_node in vectorizer"); } - IDToValue.erase(IteratorID); + IDToValue.erase(IDToValue.find(IteratorID)); isl_id_free(IteratorID); isl_union_map_free(Schedule); @@ -379,7 +528,7 @@ Annotator.popLoop(Parallel); - IDToValue.erase(IteratorID); + IDToValue.erase(IDToValue.find(IteratorID)); Builder.SetInsertPoint(ExitBlock->begin()); @@ -388,6 +537,139 @@ isl_id_free(IteratorID); } +/// @brief Remove the BBs contained in a (sub)function from the dominator tree. +/// +/// This function removes the basic blocks that are part of a subfunction from +/// the dominator tree. Specifically, when generating code it may happen that at +/// some point the code generation continues in a new sub-function (e.g., when +/// generating OpenMP code). The basic blocks that are created in this +/// sub-function are then still part of the dominator tree of the original +/// function, such that the dominator tree reaches over function boundaries. +/// This is not only incorrect, but also causes crashes. This function now +/// removes from the dominator tree all basic blocks that are dominated (and +/// consequently reachable) from the entry block of this (sub)function. +/// +/// FIXME: A LLVM (function or region) pass should not touch anything outside of +/// the function/region it runs on. Hence, the pure need for this function shows +/// that we do not comply to this rule. At the moment, this does not cause any +/// issues, but we should be aware that such issues may appear. Unfortunately +/// the current LLVM pass infrastructure does not allow to make Polly a module +/// or call-graph pass to solve this issue, as such a pass would not have access +/// to the per-function analyses passes needed by Polly. A future pass manager +/// infrastructure is supposed to enable such kind of access possibly allowing +/// us to create a cleaner solution here. +/// +/// FIXME: Instead of adding the dominance information and then dropping it +/// later on, we should try to just not add it in the first place. This requires +/// some careful testing to make sure this does not break in interaction with +/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or +/// which may try to update it. +/// +/// @param F The function which contains the BBs to removed. +/// @param DT The dominator tree from which to remove the BBs. +static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { + DomTreeNode *N = DT.getNode(&F->getEntryBlock()); + std::vector Nodes; + + // We can only remove an element from the dominator tree, if all its children + // have been removed. To ensure this we obtain the list of nodes to remove + // using a post-order tree traversal. + for (po_iterator I = po_begin(N), E = po_end(N); I != E; ++I) + Nodes.push_back(I->getBlock()); + + for (BasicBlock *BB : Nodes) + DT.eraseNode(BB); +} + +void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { + isl_ast_node *Body; + isl_ast_expr *Init, *Inc, *Iterator, *UB; + isl_id *IteratorID; + Value *ValueLB, *ValueUB, *ValueInc; + Type *MaxType; + Value *IV; + CmpInst::Predicate Predicate; + + Body = isl_ast_node_for_get_body(For); + Init = isl_ast_node_for_get_init(For); + Inc = isl_ast_node_for_get_inc(For); + Iterator = isl_ast_node_for_get_iterator(For); + IteratorID = isl_ast_expr_get_id(Iterator); + UB = getUpperBound(For, Predicate); + + ValueLB = ExprBuilder.create(Init); + ValueUB = ExprBuilder.create(UB); + ValueInc = ExprBuilder.create(Inc); + + // OpenMP always uses SLE. In case the isl generated AST uses a SLT + // expression, we need to adjust the loop blound by one. + if (Predicate == CmpInst::ICMP_SLT) + ValueUB = Builder.CreateAdd( + ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); + + MaxType = ExprBuilder.getType(Iterator); + MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); + MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); + MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); + + if (MaxType != ValueLB->getType()) + ValueLB = Builder.CreateSExt(ValueLB, MaxType); + if (MaxType != ValueUB->getType()) + ValueUB = Builder.CreateSExt(ValueUB, MaxType); + if (MaxType != ValueInc->getType()) + ValueInc = Builder.CreateSExt(ValueInc, MaxType); + + BasicBlock::iterator LoopBody; + + SetVector SubtreeValues; + SetVector Loops; + + getReferencesInSubtree(For, SubtreeValues, Loops); + + // Create for all loops we depend on values that contain the current loop + // iteration. These values are necessary to generate code for SCEVs that + // depend on such loops. As a result we need to pass them to the subfunction. + for (const Loop *L : Loops) { + const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), + SE.getUnknown(Builder.getInt64(1)), + L, SCEV::FlagAnyWrap); + Value *V = generateSCEV(OuterLIV); + OutsideLoopIterations[L] = SE.getUnknown(V); + SubtreeValues.insert(V); + } + + ParallelLoopGenerator::ValueToValueMapTy NewValues; + ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL); + + IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc, + SubtreeValues, NewValues, &LoopBody); + BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); + Builder.SetInsertPoint(LoopBody); + + // Save the current values. + ValueMapT ValueMapCopy = ValueMap; + IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; + + updateValues(NewValues); + IDToValue[IteratorID] = IV; + + create(Body); + + // Restore the original values. + ValueMap = ValueMapCopy; + IDToValue = IDToValueCopy; + + Builder.SetInsertPoint(AfterLoop); + removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); + + for (const Loop *L : Loops) + OutsideLoopIterations.erase(L); + + isl_ast_node_free(For); + isl_ast_expr_free(Iterator); + isl_id_free(IteratorID); +} + void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { bool Vector = PollyVectorizerChoice != VECTORIZER_NONE; @@ -399,6 +681,11 @@ return; } } + + if (IslAstInfo::isExecutedInParallel(For)) { + createForParallel(For); + return; + } createForSequential(For); } @@ -474,6 +761,12 @@ } } + // Add the current ValueMap to our per-statement value map. + // + // This is needed e.g. to rewrite array base addresses when moving code + // into a parallely executed subfunction. + VMap.insert(ValueMap.begin(), ValueMap.end()); + isl_ast_expr_free(Expr); } @@ -506,6 +799,8 @@ Id = isl_ast_expr_get_id(StmtExpr); isl_ast_expr_free(StmtExpr); + LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); + Stmt = (ScopStmt *)isl_id_get_user(Id); createSubstitutions(Expr, Stmt, VMap, LTS); @@ -558,6 +853,27 @@ isl_id_free(Id); } + // Generate values for the current loop iteration for all surrounding loops. + // + // We may also reference loops outside of the scop which do not contain the + // scop itself, but as the number of such scops may be arbitrarily large we do + // not generate code for them here, but only at the point of code generation + // where these values are needed. + Region &R = S.getRegion(); + Loop *L = LI.getLoopFor(R.getEntry()); + + while (L != nullptr && R.contains(L)) + L = L->getParentLoop(); + + while (L != nullptr) { + const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), + SE.getUnknown(Builder.getInt64(1)), + L, SCEV::FlagAnyWrap); + Value *V = generateSCEV(OuterLIV); + OutsideLoopIterations[L] = SE.getUnknown(V); + L = L->getParentLoop(); + } + isl_set_free(Context); } @@ -574,6 +890,9 @@ IslCodeGeneration() : ScopPass(ID) {} + /// @brief The datalayout used + const DataLayout *DL; + /// @name The analysis passes we need to generate code. /// ///{ @@ -605,6 +924,7 @@ AI = &getAnalysis(); DT = &getAnalysis().getDomTree(); SE = &getAnalysis(); + DL = &getAnalysis().getDataLayout(); assert(!S.getRegion().isTopLevelRegion() && "Top level regions are not supported"); @@ -616,7 +936,7 @@ BasicBlock *EnteringBB = simplifyRegion(&S, this); PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); - IslNodeBuilder NodeBuilder(Builder, Annotator, this, *LI, *SE, *DT); + IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); NodeBuilder.addParameters(S.getContext()); Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); @@ -630,6 +950,7 @@ virtual void printScop(raw_ostream &OS) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -461,6 +461,48 @@ }; namespace polly { +/// Find all loops referenced in SCEVAddRecExprs. +class SCEVFindLoops { + SetVector &Loops; + +public: + SCEVFindLoops(SetVector &Loops) : Loops(Loops) {} + + bool follow(const SCEV *S) { + if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) + Loops.insert(AddRec->getLoop()); + return true; + } + bool isDone() { return false; } +}; + +void findLoops(const SCEV *Expr, SetVector &Loops) { + SCEVFindLoops FindLoops(Loops); + SCEVTraversal ST(FindLoops); + ST.visitAll(Expr); +} + +/// Find all values referenced in SCEVUnknowns. +class SCEVFindValues { + SetVector &Values; + +public: + SCEVFindValues(SetVector &Values) : Values(Values) {} + + bool follow(const SCEV *S) { + if (const SCEVUnknown *Unknown = dyn_cast(S)) + Values.insert(Unknown->getValue()); + return true; + } + bool isDone() { return false; } +}; + +void findValues(const SCEV *Expr, SetVector &Values) { + SCEVFindValues FindValues(Values); + SCEVTraversal ST(FindValues); + ST.visitAll(Expr); +} + bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) { return SCEVInRegionDependences::hasDependences(Expr, R); } Index: test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/loop-body-references-outer-iv.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; This code has failed the scev based code generation as the scev in the scop +; contains an AddRecExpr of an outer loop. When generating code, we did not +; properly forward the value of this expression to the subfunction. + +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_for_j(c1); + +; IR: @single_parallel_loop.polly.subfn + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 + +define void @single_parallel_loop() nounwind { +entry: + br label %for.i + +for.i: + %indvar.i = phi i64 [ %indvar.i.next, %for.i.inc], [ 0, %entry ] + br label %for.j + +for.j: + %indvar.j = phi i64 [ %indvar.j.next, %for.j], [ 0, %for.i ] + %sum = add i64 %indvar.j, %indvar.i + %scevgep = getelementptr [1024 x float]* @A, i64 0, i64 %sum + store float 0.0, float *%scevgep + %indvar.j.next = add i64 %indvar.j, 1 + %exitcond.j = icmp slt i64 %indvar.j.next, 1024 + br i1 %exitcond.j, label %for.j, label %for.i.inc + +for.i.inc: + fence seq_cst + %indvar.i.next = add i64 %indvar.i, 1 + %exitcond.i = icmp ne i64 %indvar.i.next, 1024 + br i1 %exitcond.i, label %for.i, label %exit + +exit: + ret void +} Index: test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-2.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR-SCEV + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_for_i(c1); + +; IR: %[[gep:[._a-zA-Z0-9]*]] = getelementptr inbounds { [1024 x double]*, i64 }* %polly.par.userContext, i32 0, i32 1 +; IR: store i64 %extern, i64* %[[gep]] + +; IR-SCEV: getelementptr inbounds { [1024 x double]* }* %polly.par.userContext, i32 0, i32 0 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @kernel_trmm([1024 x double]* %B) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %extern = add i64 1, 0 + br label %for.i + +for.i: + %indvar.i = phi i64 [ %indvar.i.next, %for.i ], [ 0, %for.cond1.preheader ] + %getelementptr = getelementptr [1024 x double]* %B, i64 %extern, i64 %indvar.i + store double 0.000000e+00, double* %getelementptr + %indvar.i.next = add i64 %indvar.i, 1 + %exitcond.i = icmp ne i64 %indvar.i.next, 1024 + br i1 %exitcond.i, label %for.i, label %end + +end: + ret void +} Index: test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -basicaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; The interesting part of this test case is the instruction: +; %tmp = bitcast i8* %call to i64** +; which is not part of the scop. In the SCEV based code generation not '%tmp', +; but %call is a parameter of the SCoP and we need to make sure its value is +; properly forwarded to the subfunction. + +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 < cols; c1 += 1) +; AST: Stmt_for_body(c1); + +; IR: @foo.polly.subfn + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i64 %cols, i8* noalias %call) { +entry: + %tmp = bitcast i8* %call to i64** + br label %for.body + +for.body: + %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64** %tmp, i64 0 + %tmp1 = load i64** %arrayidx, align 8 + %arrayidx.2 = getelementptr inbounds i64* %tmp1, i64 %indvar + store i64 1, i64* %arrayidx.2, align 4 + %indvar.next = add nsw i64 %indvar, 1 + %cmp = icmp slt i64 %indvar.next, %cols + br i1 %cmp, label %for.body, label %end + +end: + ret void +} + +; Another variation of this test case, now with even more of the index +; expression defined outside of the scop. + +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 < cols; c1 += 1) +; AST: Stmt_for_body(c1); + +; IR: @bar.polly.subfn + +define void @bar(i64 %cols, i8* noalias %call) { +entry: + %tmp = bitcast i8* %call to i64** + %arrayidx = getelementptr inbounds i64** %tmp, i64 0 + br label %for.body + +for.body: + %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] + %tmp1 = load i64** %arrayidx, align 8 + %arrayidx.2 = getelementptr inbounds i64* %tmp1, i64 %indvar + store i64 1, i64* %arrayidx.2, align 4 + %indvar.next = add nsw i64 %indvar, 1 + %cmp = icmp slt i64 %indvar.next, %cols + br i1 %cmp, label %for.body, label %end + +end: + ret void +} Index: test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/loop-body-references-outer-values.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR + +; Make sure we correctly forward the reference to 'A' to the OpenMP subfunction. +; +; void loop_references_outer_ids(float *A) { +; for (long i = 0; i < 100; i++) +; A[i] = i; +; } + + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 99; c1 += 1) +; AST: Stmt_for_body(c1); + +; IR-LABEL: polly.start: +; IR-NEXT: %0 = bitcast { float* }* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 8, i8* %0) +; IR-NEXT: %1 = getelementptr inbounds { float* }* %polly.par.userContext, i32 0, i32 0 +; IR-NEXT: store float* %A, float** %1 +; IR-NEXT: %polly.par.userContext1 = bitcast { float* }* %polly.par.userContext to i8* + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @loop_references_outer_ids(float* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i64 %i.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = sitofp i64 %i.0 to float + %arrayidx = getelementptr inbounds float* %A, i64 %i.0 + store float %conv, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/loop-bounds-reference-outer-ids.ll @@ -0,0 +1,102 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev < %s | FileCheck %s -check-prefix=IR +; +; float A[100]; +; +; void loop_references_outer_ids(long n) { +; for (long i = 0; i < 100; i++) +; for (long j = 0; j < 100; j++) +; for (long k = 0; k < n + i; k++) +; A[j] += i + j + k; +; } + +; In this test case we verify that the j-loop is generated as OpenMP parallel +; loop and that the values of 'i' and 'n', needed in the loop bounds of the +; k-loop, are correctly passed to the subfunction. + +; AST: #pragma minimal dependence distance: 1 +; AST: for (int c1 = max(0, -n + 1); c1 <= 99; c1 += 1) +; AST: #pragma omp parallel for +; AST: for (int c3 = 0; c3 <= 99; c3 += 1) +; AST: #pragma minimal dependence distance: 1 +; AST: for (int c5 = 0; c5 < n + c1; c5 += 1) +; AST: Stmt_for_body6(c1, c3, c5); + +; IR: %polly.par.userContext = alloca { i64, i64 } +; IR: %4 = bitcast { i64, i64 }* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 16, i8* %4) +; IR-NEXT: %5 = getelementptr inbounds { i64, i64 }* %polly.par.userContext, i32 0, i32 0 +; IR-NEXT: store i64 %n, i64* %5 +; IR-NEXT: %6 = getelementptr inbounds { i64, i64 }* %polly.par.userContext, i32 0, i32 1 +; IR-NEXT: store i64 %polly.indvar, i64* %6 +; IR-NEXT: %polly.par.userContext1 = bitcast { i64, i64 }* %polly.par.userContext to i8* + +; IR-LABEL: @loop_references_outer_ids.polly.subfn(i8* %polly.par.userContext) +; IR: %polly.par.userContext1 = bitcast i8* %polly.par.userContext to { i64, i64 }* +; IR-NEXT: %0 = getelementptr inbounds { i64, i64 }* %polly.par.userContext1, i32 0, i32 0 +; IR-NEXT: %1 = load i64* %0 +; IR-NEXT: %2 = getelementptr inbounds { i64, i64 }* %polly.par.userContext1, i32 0, i32 1 +; IR-NEXT: %3 = load i64* %2 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = common global [100 x float] zeroinitializer, align 16 + +define void @loop_references_outer_ids(i64 %n) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc13, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ] + %exitcond1 = icmp ne i64 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.end15 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc10, %for.body + %j.0 = phi i64 [ 0, %for.body ], [ %inc11, %for.inc10 ] + %exitcond = icmp ne i64 %j.0, 100 + br i1 %exitcond, label %for.body3, label %for.end12 + +for.body3: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc, %for.body3 + %k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ] + %add = add nsw i64 %i.0, %n + %cmp5 = icmp slt i64 %k.0, %add + br i1 %cmp5, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4 + %add7 = add nsw i64 %i.0, %j.0 + %add8 = add nsw i64 %add7, %k.0 + %conv = sitofp i64 %add8 to float + %arrayidx = getelementptr inbounds [100 x float]* @A, i64 0, i64 %j.0 + %tmp = load float* %arrayidx, align 4 + %add9 = fadd float %tmp, %conv + store float %add9, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body6 + %inc = add nsw i64 %k.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond4 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %inc11 = add nsw i64 %j.0, 1 + br label %for.cond1 + +for.end12: ; preds = %for.cond1 + br label %for.inc13 + +for.inc13: ; preds = %for.end12 + %inc14 = add nsw i64 %i.0, 1 + br label %for.cond + +for.end15: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/OpenMP/reference-other-bb.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/reference-other-bb.ll @@ -0,0 +1,27 @@ +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; IR: @foo.polly.subfn +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i32 %sendcount, i8* %recvbuf) { +entry: + br label %sw.bb3 + +sw.bb3: + %tmp = bitcast i8* %recvbuf to double* + %cmp75 = icmp sgt i32 %sendcount, 0 + br i1 %cmp75, label %for.body, label %end + +for.body: + %i.16 = phi i32 [ %inc14, %for.body ], [ 0, %sw.bb3 ] + %idxprom11 = sext i32 %i.16 to i64 + %arrayidx12 = getelementptr inbounds double* %tmp, i64 %idxprom11 + store double 1.0, double* %arrayidx12, align 8 + %inc14 = add nsw i32 %i.16, 1 + %cmp7 = icmp slt i32 %inc14, %sendcount + br i1 %cmp7, label %for.body, label %end + +end: + ret void +} Index: test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/reference-preceeding-loop.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + + +; - Test the case where scalar evolution references a loop that is outside +; of the scop, but does not contain the scop. +; - Test the case where two parallel subfunctions are created. + +; AST: if (symbol >= p_2 + 1) { +; AST-NEXT: #pragma simd +; AST-NEXT: #pragma omp parallel for +; AST-NEXT: for (int c1 = 0; c1 < p_0 + symbol; c1 += 1) +; AST-NEXT: Stmt_while_body(c1); +; AST-NEXT: } else +; AST-NEXT: #pragma simd +; AST-NEXT: #pragma omp parallel for +; AST-NEXT: for (int c1 = 0; c1 <= p_0 + p_2; c1 += 1) +; AST-NEXT: Stmt_while_body(c1); + +; IR: @update_model.polly.subfn +; IR: @update_model.polly.subfn1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@cum_freq = external global [258 x i64], align 16 + +define void @update_model(i64 %symbol) { +entry: + br label %for.one + +for.one: + %i.1 = phi i64 [ %dec17, %for.one ], [ %symbol, %entry ] + %dec17 = add nsw i64 %i.1, -1 + br i1 undef, label %for.one, label %while.body + +while.body: + %indvar = phi i64 [ %sub42, %while.body ], [ %i.1, %for.one ] + %sub42 = add nsw i64 %indvar, -1 + %arrayidx44 = getelementptr inbounds [258 x i64]* @cum_freq, i64 0, i64 %sub42 + store i64 1, i64* %arrayidx44, align 4 + %cmp40 = icmp sgt i64 %sub42, 0 + br i1 %cmp40, label %while.body, label %while.end + +while.end: + ret void +} Index: test/Isl/CodeGen/OpenMP/single_loop.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/single_loop.ll @@ -0,0 +1,118 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 +; RUN: opt %loadPolly -polly-parallel -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 + +; This extensive test case tests the creation of the full set of OpenMP calls +; as well as the subfunction creation using a trivial loop as example. + +; #define N 1024 +; float A[N]; +; +; void single_parallel_loop(void) { +; for (long i = 0; i < N; i++) +; A[i] = 1; +; } + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_S(c1); + +; AST-STRIDE4: #pragma omp parallel for +; AST-STRIDE4: for (int c1 = 0; c1 <= 1023; c1 += 4) +; AST-STRIDE4: #pragma simd +; AST-STRIDE4: for (int c2 = c1; c2 <= c1 + 3; c2 += 1) +; AST-STRIDE4: Stmt_S(c2); + +; IR-LABEL: single_parallel_loop() +; IR-NEXT: entry +; IR-NEXT: %polly.par.userContext = alloca + +; IR-LABEL: polly.start: +; IR-NEXT: %0 = bitcast {}* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.start(i64 0, i8* %0) +; IR-NEXT: %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8* +; IR-NEXT: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1) +; IR-NEXT: call void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext1) +; IR-NEXT: call void @GOMP_parallel_end() +; IR-NEXT: %1 = bitcast {}* %polly.par.userContext to i8* +; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %1) +; IR-NEXT: br label %polly.merge_new_and_old + +; IR: define internal void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext) #1 +; IR-LABEL: polly.par.setup: +; IR-NEXT: %polly.par.LBPtr = alloca i64 +; IR-NEXT: %polly.par.UBPtr = alloca i64 +; IR-NEXT: %polly.par.userContext1 = +; IR: br label %polly.par.checkNext + +; IR-LABEL: polly.par.exit: +; IR-NEXT: call void @GOMP_loop_end_nowait() +; IR-NEXT: ret void + +; IR-LABEL: polly.par.checkNext: +; IR-NEXT: %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr) +; IR-NEXT: %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0 +; IR-NEXT: br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit + +; IR-LABEL: polly.par.loadIVBounds: +; IR-NEXT: %polly.par.LB = load i64* %polly.par.LBPtr +; IR-NEXT: %polly.par.UB = load i64* %polly.par.UBPtr +; IR-NEXT: %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1 +; IR-NEXT: br label %polly.loop_preheader + +; IR-LABEL: polly.loop_exit: +; IR-NEXT: br label %polly.par.checkNext + +; IR-LABEL: polly.loop_header: +; IR-NEXT: %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ] +; IR-NEXT: br label %polly.stmt.S + +; IR-LABEL: polly.stmt.S: +; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar +; IR-NEXT: store float 1.000000e+00, float* %[[gep]] +; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; IR-NEXT: %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 1 +; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub +; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit + +; IR-LABEL: polly.loop_preheader: +; IR-NEXT: br label %polly.loop_header + +; IR: attributes #1 = { "polly.skip.fn" } + +; IR-STRIDE4: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4) +; IR-STRIDE4: add nsw i64 %polly.indvar, 3 +; IR-STRIDE4: %polly.indvar_next = add nsw i64 %polly.indvar, 4 +; IR-STRIDE4 %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 + +define void @single_parallel_loop() nounwind { +entry: + br label %for.i + +for.i: + %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ] + %scevgep = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %S, label %exit + +S: + store float 1.0, float* %scevgep + br label %for.inc + +for.inc: + %indvar.next = add i64 %indvar, 1 + br label %for.i + +exit: + ret void +} Index: test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-ast -analyze < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -tbaa -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; #define N 1024 +; float A[N]; +; +; void single_parallel_loop(void) { +; for (long i = 0; i < N; i++) +; A[i] = 1; +; } + +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= 1023; c1 += 1) +; AST: Stmt_S(c1); + +; IR: @single_parallel_loop.polly.subfn +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @single_parallel_loop(float** %A) nounwind { +entry: + br label %for.i + +for.i: + %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ] + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %S, label %exit + +S: + %ptr = load float** %A, !tbaa !2 + %scevgep = getelementptr float* %ptr, i64 %indvar + %val = load float* %scevgep, !tbaa !6 + %sum = fadd float %val, 1.0 + store float %sum, float* %scevgep, !tbaa !6 + br label %for.inc + +for.inc: + %indvar.next = add i64 %indvar, 1 + br label %for.i + +exit: + ret void +} + +!2 = metadata !{metadata !"float", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} +!6 = metadata !{metadata !"float *ptr", metadata !3, i64 0} Index: test/Isl/CodeGen/OpenMP/single_parallel_loop___%for.i---%exit.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/single_parallel_loop___%for.i---%exit.jscop @@ -0,0 +1,17 @@ +{ + "context" : "{ : }", + "name" : "for.i => exit", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "{ Stmt_S[i0] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_S[i0] : i0 >= 0 and i0 <= 1023 }", + "name" : "Stmt_S", + "schedule" : "{ Stmt_S[i0] -> scattering[0, floor(i0/4) * 4, i0] }" + } + ] +} Index: test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-parallel -polly-ast -analyze -polly-codegen-scev < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-parallel -polly-codegen-isl -S -polly-codegen-scev -verify-dom-info < %s | FileCheck %s -check-prefix=IR + +; This test case verifies that we create correct code even if two OpenMP loops +; share common outer variables. + +; AST: if (nj >= p_1 + 3) { +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 < p_0 + nj - 1; c1 += 1) +; AST: Stmt_for_body35(c1); +; AST: } else +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c1 = 0; c1 <= p_0 + p_1; c1 += 1) +; AST: Stmt_for_body35(c1); + +; IR: @foo.polly.subfn +; IR: @foo.polly.subfn1 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i64 %nj, [512 x double]* %R) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %k.014 = phi i64 [ %inc87, %for.inc86 ], [ 0, %entry ] + %j.010 = add nsw i64 %k.014, 1 + br i1 undef, label %for.body35, label %for.inc86 + +for.body35: + %j.012 = phi i64 [ %j.0, %for.body35 ], [ %j.010, %for.cond1.preheader ] + %arrayidx39 = getelementptr inbounds [512 x double]* %R, i64 0, i64 %j.012 + store double 0.000000e+00, double* %arrayidx39 + %j.0 = add nsw i64 %j.012, 1 + %cmp34 = icmp slt i64 %j.0, %nj + br i1 %cmp34, label %for.body35, label %for.inc86 + +for.inc86: + %inc87 = add nsw i64 %k.014, 1 + br i1 undef, label %for.cond1.preheader, label %for.end88 + +for.end88: + ret void +}