Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Standalone View
lib/CodeGen/IslCodeGeneration.cpp
Show All 24 Lines | |||||
#include "polly/CodeGen/IslAst.h" | #include "polly/CodeGen/IslAst.h" | ||||
#include "polly/CodeGen/LoopGenerators.h" | #include "polly/CodeGen/LoopGenerators.h" | ||||
#include "polly/CodeGen/Utils.h" | #include "polly/CodeGen/Utils.h" | ||||
#include "polly/Dependences.h" | #include "polly/Dependences.h" | ||||
#include "polly/LinkAllPasses.h" | #include "polly/LinkAllPasses.h" | ||||
#include "polly/ScopInfo.h" | #include "polly/ScopInfo.h" | ||||
#include "polly/Support/GICHelper.h" | #include "polly/Support/GICHelper.h" | ||||
#include "polly/Support/ScopHelper.h" | #include "polly/Support/ScopHelper.h" | ||||
#include "polly/Support/SCEVValidator.h" | |||||
#include "polly/TempScopInfo.h" | #include "polly/TempScopInfo.h" | ||||
#include "llvm/ADT/PostOrderIterator.h" | |||||
#include "llvm/ADT/SmallPtrSet.h" | |||||
#include "llvm/Analysis/LoopInfo.h" | #include "llvm/Analysis/LoopInfo.h" | ||||
#include "llvm/Analysis/PostDominators.h" | #include "llvm/Analysis/PostDominators.h" | ||||
#include "llvm/Analysis/ScalarEvolutionExpander.h" | #include "llvm/Analysis/ScalarEvolutionExpander.h" | ||||
#include "llvm/IR/Module.h" | #include "llvm/IR/Module.h" | ||||
#include "llvm/Support/CommandLine.h" | #include "llvm/Support/CommandLine.h" | ||||
#include "llvm/Support/Debug.h" | #include "llvm/Support/Debug.h" | ||||
#include "llvm/IR/DataLayout.h" | #include "llvm/IR/DataLayout.h" | ||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||
Show All 9 Lines | |||||
using namespace polly; | using namespace polly; | ||||
using namespace llvm; | using namespace llvm; | ||||
#define DEBUG_TYPE "polly-codegen-isl" | #define DEBUG_TYPE "polly-codegen-isl" | ||||
class IslNodeBuilder { | class IslNodeBuilder { | ||||
public: | public: | ||||
IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, | IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, | ||||
LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT) | const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, | ||||
: Builder(Builder), Annotator(Annotator), | DominatorTree &DT, Scop &S) | ||||
: S(S), Builder(Builder), Annotator(Annotator), | |||||
Rewriter(new SCEVExpander(SE, "polly")), | Rewriter(new SCEVExpander(SE, "polly")), | ||||
ExprBuilder(Builder, IDToValue, *Rewriter), P(P), LI(LI), SE(SE), | ExprBuilder(Builder, IDToValue, *Rewriter), P(P), DL(DL), LI(LI), | ||||
DT(DT) {} | SE(SE), DT(DT) {} | ||||
~IslNodeBuilder() { delete Rewriter; } | ~IslNodeBuilder() { delete Rewriter; } | ||||
void addParameters(__isl_take isl_set *Context); | void addParameters(__isl_take isl_set *Context); | ||||
void create(__isl_take isl_ast_node *Node); | void create(__isl_take isl_ast_node *Node); | ||||
IslExprBuilder &getExprBuilder() { return ExprBuilder; } | IslExprBuilder &getExprBuilder() { return ExprBuilder; } | ||||
private: | private: | ||||
Scop &S; | |||||
PollyIRBuilder &Builder; | PollyIRBuilder &Builder; | ||||
ScopAnnotator &Annotator; | ScopAnnotator &Annotator; | ||||
/// @brief A SCEVExpander to create llvm values from SCEVs. | /// @brief A SCEVExpander to create llvm values from SCEVs. | ||||
SCEVExpander *Rewriter; | SCEVExpander *Rewriter; | ||||
IslExprBuilder ExprBuilder; | IslExprBuilder ExprBuilder; | ||||
Pass *P; | Pass *P; | ||||
const DataLayout &DL; | |||||
LoopInfo &LI; | LoopInfo &LI; | ||||
ScalarEvolution &SE; | ScalarEvolution &SE; | ||||
DominatorTree &DT; | DominatorTree &DT; | ||||
/// @brief The current iteration of out-of-scop loops | |||||
simbuerg: out-of-scop | |||||
Not Done ReplyInline ActionsFixed. grosser: Fixed. | |||||
/// | |||||
/// This map provides for a given loop a llvm::Value that contains the current | |||||
/// loop iteration. | |||||
LoopToScevMapT OutsideLoopIterations; | |||||
// This maps an isl_id* to the Value* it has in the generated program. For now | // This maps an isl_id* to the Value* it has in the generated program. For now | ||||
// on, the only isl_ids that are stored here are the newly calculated loop | // on, the only isl_ids that are stored here are the newly calculated loop | ||||
// ivs. | // ivs. | ||||
IslExprBuilder::IDToValueTy IDToValue; | IslExprBuilder::IDToValueTy IDToValue; | ||||
/// Generate code for a given SCEV* | /// Generate code for a given SCEV* | ||||
/// | /// | ||||
/// This function generates code for a given SCEV expression. It generated | /// This function generates code for a given SCEV expression. It generated | ||||
/// code is emmitted at the end of the basic block our Builder currently | /// code is emmitted at the end of the basic block our Builder currently | ||||
/// points to and the resulting value is returned. | /// points to and the resulting value is returned. | ||||
Not Done ReplyInline ActionsI don't like the comment (style + content). It doesn't give any more information than the type of the map. jdoerfert: I don't like the comment (style + content). It doesn't give any more information than the type… | |||||
/// | /// | ||||
/// @param Expr The expression to code generate. | /// @param Expr The expression to code generate. | ||||
Value *generateSCEV(const SCEV *Expr); | Value *generateSCEV(const SCEV *Expr); | ||||
/// A set of Value -> Value remappings to apply when generating new code. | |||||
Not Done ReplyInline Actionsone more / please jdoerfert: one more / please | |||||
Not Done ReplyInline ActionsFixed. grosser: Fixed. | |||||
/// | |||||
/// When generating new code for a ScopStmt this map is used to map certain | |||||
/// llvm::Values to new llvm::Values. | |||||
ValueMapT ValueMap; | |||||
// Extract the upper bound of this loop | // Extract the upper bound of this loop | ||||
// | // | ||||
// The isl code generation can generate arbitrary expressions to check if the | // The isl code generation can generate arbitrary expressions to check if the | ||||
// upper bound of a loop is reached, but it provides an option to enforce | // upper bound of a loop is reached, but it provides an option to enforce | ||||
// 'atomic' upper bounds. An 'atomic upper bound is always of the form | // 'atomic' upper bounds. An 'atomic upper bound is always of the form | ||||
// iv <= expr, where expr is an (arbitrary) expression not containing iv. | // iv <= expr, where expr is an (arbitrary) expression not containing iv. | ||||
// | // | ||||
// This function extracts 'atomic' upper bounds. Polly, in general, requires | // This function extracts 'atomic' upper bounds. Polly, in general, requires | ||||
// atomic upper bounds for the following reasons: | // atomic upper bounds for the following reasons: | ||||
// | // | ||||
// 1. An atomic upper bound is loop invariant | // 1. An atomic upper bound is loop invariant | ||||
// | // | ||||
// It must not be calculated at each loop iteration and can often even be | // It must not be calculated at each loop iteration and can often even be | ||||
// hoisted out further by the loop invariant code motion. | // hoisted out further by the loop invariant code motion. | ||||
// | // | ||||
// 2. OpenMP needs a loop invarient upper bound to calculate the number | // 2. OpenMP needs a loop invarient upper bound to calculate the number | ||||
// of loop iterations. | // of loop iterations. | ||||
// | // | ||||
// 3. With the existing code, upper bounds have been easier to implement. | // 3. With the existing code, upper bounds have been easier to implement. | ||||
__isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For, | __isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For, | ||||
CmpInst::Predicate &Predicate); | CmpInst::Predicate &Predicate); | ||||
unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); | unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); | ||||
Not Done ReplyInline ActionsPlease, do not use this awfull OMP any more but talk about parallel function arguments or sth. similar. (also documentation) jdoerfert: Please, do not use this awfull OMP any more but talk about parallel function arguments or sth. | |||||
/// Compute the values and loops referenced in this subtree. | |||||
Not Done ReplyInline ActionsWhen I see the type here I remember my patch to generalize the code generation (LoopGenerators). I hoped you would review that and work on that as a base. Any reasons not to? jdoerfert: When I see the type here I remember my patch to generalize the code generation (LoopGenerators). | |||||
/// | |||||
/// This function looks at all ScopStmts scheduled below the provided For node | |||||
/// and finds the llvm::Value[s] and llvm::Loops[s] which are referenced but | |||||
/// not locally defined. | |||||
/// | |||||
/// Values that can be synthesized or that are available as globals are | |||||
/// considered locally defined. | |||||
/// | |||||
/// Loops that contain the scop or that are part of the scop are considered | |||||
/// locally defined. Loops that are before the scop, but do not contain the | |||||
Not Done ReplyInline ActionsWhat loop are left? jdoerfert: What loop are left? | |||||
Not Done ReplyInline ActionsI added: "Loops that are before the scop, but do not contain the scop itself are considered not locally defined." grosser: I added:
"Loops that are before the scop, but do not contain the scop itself are considered… | |||||
/// scop itself are considered not locally defined. | |||||
/// | |||||
/// @param For The node defining the subtree. | |||||
/// @param Values A vector that will be filled with the Values referenced in | |||||
/// this subtree. | |||||
/// @param Loops A vector that will be filled with the Loops referenced in | |||||
/// this subtree. | |||||
void getReferencesInSubtree(__isl_keep isl_ast_node *For, | |||||
SetVector<Value *> &Values, | |||||
SetVector<const Loop *> &Loops); | |||||
/// Change the llvm::Value(s) used for code generation. | |||||
/// | |||||
/// When generating code certain values (e.g., references to induction | |||||
/// variables or array base pointers) in the original code may be replaced by | |||||
/// new values. This function allows to (partially) update the set of values | |||||
/// used. A typical use case for this function is the case when we continue | |||||
/// code generation in a subfunction/kernel function and need to explicitly | |||||
/// pass down certain values. | |||||
/// | |||||
/// @param NewValues A map that maps certain llvm::Values to new llvm::Values. | |||||
void updateValues(ParallelLoopGenerator::ValueToValueMapTy &NewValues); | |||||
void createFor(__isl_take isl_ast_node *For); | void createFor(__isl_take isl_ast_node *For); | ||||
void createForVector(__isl_take isl_ast_node *For, int VectorWidth); | void createForVector(__isl_take isl_ast_node *For, int VectorWidth); | ||||
void createForSequential(__isl_take isl_ast_node *For); | void createForSequential(__isl_take isl_ast_node *For); | ||||
Not Done ReplyInline Actionsisn't createForParallel much nicer? jdoerfert: isn't createForParallel much nicer? | |||||
Not Done ReplyInline Actionsdocu? jdoerfert: docu? | |||||
Not Done ReplyInline ActionsI added: + / Create LLVM-IR that executes a for node thread parallel. grosser: I added:
+ /// Create LLVM-IR that executes a for node thread parallel.
+ ///
+ /// @param… | |||||
/// Create LLVM-IR that executes a for node thread parallel. | |||||
/// | |||||
/// @param For The FOR isl_ast_node for which code is generated. | |||||
void createForParallel(__isl_take isl_ast_node *For); | |||||
/// Generate LLVM-IR that computes the values of the original induction | /// Generate LLVM-IR that computes the values of the original induction | ||||
/// variables in function of the newly generated loop induction variables. | /// variables in function of the newly generated loop induction variables. | ||||
/// | /// | ||||
/// Example: | /// Example: | ||||
/// | /// | ||||
/// // Original | /// // Original | ||||
/// for i | /// for i | ||||
/// for j | /// for j | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { | ||||
isl_union_map *Schedule = IslAstInfo::getSchedule(For); | isl_union_map *Schedule = IslAstInfo::getSchedule(For); | ||||
isl_set *LoopDomain = isl_set_from_union_set(isl_union_map_range(Schedule)); | isl_set *LoopDomain = isl_set_from_union_set(isl_union_map_range(Schedule)); | ||||
int NumberOfIterations = polly::getNumberOfIterations(LoopDomain); | int NumberOfIterations = polly::getNumberOfIterations(LoopDomain); | ||||
if (NumberOfIterations == -1) | if (NumberOfIterations == -1) | ||||
return -1; | return -1; | ||||
return NumberOfIterations + 1; | return NumberOfIterations + 1; | ||||
} | } | ||||
struct FindValuesUser { | |||||
Not Done ReplyInline ActionsI usually do not like this Object producing methodes, but prefere the object to be passed as reference instead, especially for Vectors/Sets/Maps/etc. jdoerfert: I usually do not like this Object producing methodes, but prefere the object to be passed as… | |||||
LoopInfo &LI; | |||||
ScalarEvolution &SE; | |||||
Region &R; | |||||
SetVector<Value *> &Values; | |||||
SetVector<const SCEV *> &SCEVs; | |||||
}; | |||||
/// Extract the values and SCEVs needed to generate code for a ScopStmt. | |||||
Not Done ReplyInline ActionsAny reason not to make this a static function? I'm not sure about such lambda's myself and I think we always used static functions so far. jdoerfert: Any reason not to make this a static function? I'm not sure about such lambda's myself and I… | |||||
Not Done ReplyInline ActionsExtract the values and SCEVs needed to generate code for a ScopStmt. simbuerg: Extract the values and SCEVs needed to generate code for a ScopStmt. | |||||
Not Done ReplyInline ActionsFixed. grosser: Fixed. | |||||
/// | |||||
/// This function extracts a ScopStmt from a given isl_set and computes the | |||||
Not Done ReplyInline ActionsI don't really see the benefit of static_cast here as User is a black box void*, however it might be good to start using static_cast instead of C-style casts all the time. jdoerfert: I don't really see the benefit of static_cast here as User is a black box void*, however it… | |||||
/// Values this statement depends on as well as a set of SCEV expressions that | |||||
/// need to be synthesized when generating code for this statment. | |||||
Not Done ReplyInline Actions... when generating code for this statement. simbuerg: ... when generating code for this statement. | |||||
Not Done ReplyInline ActionsFixed. grosser: Fixed. | |||||
static int findValuesInStmt(isl_set *Set, void *User) { | |||||
isl_id *Id = isl_set_get_tuple_id(Set); | |||||
struct FindValuesUser *U = static_cast<struct FindValuesUser *>(User); | |||||
Not Done ReplyInline ActionsThe name is not really descriptive. jdoerfert: The name is not really descriptive. | |||||
Not Done ReplyInline ActionsI forgot to add this to the patch, but I now renamed this to void *UserPtr and struct FindValuesUser *User grosser: I forgot to add this to the patch, but I now renamed this to
void *UserPtr and struct… | |||||
const ScopStmt *Stmt = static_cast<const ScopStmt *>(isl_id_get_user(Id)); | |||||
const BasicBlock *BB = Stmt->getBasicBlock(); | |||||
Not Done ReplyInline ActionsThat is probably not going to work. The instruction might be in the SCoP but outside the parallel loop, thus still needed as an argument for the parallel subfunction. If you agree, then I suggest to use the union_set_foreach_set only to collect all Stmts/BBs inside of For and then iterate again over them. This time we do the instruction operand checks but we check if the parent of an instruction operand is in the set of basic blocks we collected earlier. This way we should be able to identify all inner SCoP instructions we need to pass as arguments. jdoerfert: That is probably not going to work. The instruction might be in the SCoP but outside the… | |||||
// Check all the operands of instructions in the basic block. | |||||
for (const Instruction &Inst : *BB) { | |||||
for (Value *SrcVal : Inst.operands()) { | |||||
Not Done ReplyInline ActionsI would like the negated check better: if (isa<Constant> || isa<Global>) or canSynthesise(...) then continue, otherwise assume we need it. jdoerfert: I would like the negated check better:
if (isa<Constant> || isa<Global>)
or
canSynthesise(.. | |||||
if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal)) | |||||
if (canSynthesize(OpInst, &U->LI, &U->SE, &U->R)) { | |||||
U->SCEVs.insert(U->SE.getSCEVAtScope(OpInst, U->LI.getLoopFor(BB))); | |||||
continue; | |||||
} | |||||
if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal)) | |||||
if (Stmt->getParent()->getRegion().contains(OpInst)) | |||||
continue; | |||||
if (isa<Instruction>(SrcVal) || isa<Argument>(SrcVal)) | |||||
U->Values.insert(SrcVal); | |||||
} | |||||
} | |||||
isl_id_free(Id); | |||||
isl_set_free(Set); | |||||
Not Done ReplyInline ActionsWhy back to std::set? jdoerfert: Why back to std::set? | |||||
return 0; | |||||
} | |||||
void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, | |||||
SetVector<Value *> &Values, | |||||
SetVector<const Loop *> &Loops) { | |||||
SetVector<const SCEV *> SCEVs; | |||||
Not Done ReplyInline ActionsWe could remove the need for the extra set by looking up I.first in IDToValue, if found we continue otherwise we go on. jdoerfert: We could remove the need for the extra set by looking up I.first in IDToValue, if found we… | |||||
struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs}; | |||||
for (const auto &I : IDToValue) | |||||
Values.insert(I.second); | |||||
for (const auto &I : OutsideLoopIterations) | |||||
Values.insert(cast<SCEVUnknown>(I.second)->getValue()); | |||||
isl_union_set *Schedule = isl_union_map_domain(IslAstInfo::getSchedule(For)); | |||||
isl_union_set_foreach_set(Schedule, findValuesInStmt, &FindValues); | |||||
isl_union_set_free(Schedule); | |||||
for (const SCEV *Expr : SCEVs) { | |||||
findValues(Expr, Values); | |||||
findLoops(Expr, Loops); | |||||
} | |||||
Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); | |||||
Not Done ReplyInline ActionsI'm still not really convinced this is better/nicer than a static function. jdoerfert: I'm still not really convinced this is better/nicer than a static function. | |||||
Not Done ReplyInline ActionsMost likely the static function would get a descriptive name, such as: isGlobalValue, right? That is quite redundant given the only statement this function executes is: return isa<GlobalValue>(V); Furthermore, I do not think that this static function would get called from somewhere else in here, so whats the point in having it around on the top-level. simbuerg: Most likely the static function would get a descriptive name, such as: isGlobalValue, right? | |||||
Not Done ReplyInline ActionsI left the lambda for now. Similar to Andreas I have the impression that this as good as we can find a use case for lambdas. Are you proposing to generally avoid lambdas? grosser: I left the lambda for now.
Similar to Andreas I have the impression that this as good as we… | |||||
/// Remove loops that contain the scop or that are part of the scop, as they | |||||
/// are considered local. This leaves only loops that are before the scop, but | |||||
/// do not contain the scop itself. | |||||
Not Done ReplyInline ActionsI don't understand this. Can you add a comment, otherwise this is a magic condition that cannot be maintained or debuged. jdoerfert: I don't understand this. Can you add a comment, otherwise this is a magic condition that cannot… | |||||
Not Done ReplyInline ActionsThis was already indirectly documented in the header of the surrounding function. For clarity I added a brief comment right above the stmt: + / Remove loops that contain the scop or that are part of the scop, as they grosser: This was already indirectly documented in the header of the surrounding function. For clarity I… | |||||
Loops.remove_if([this](const Loop *L) { | |||||
return this->S.getRegion().contains(L) || | |||||
L->contains(S.getRegion().getEntry()); | |||||
}); | |||||
} | |||||
Not Done ReplyInline ActionsI usually like the SmallPtrSet but that's taste I guess. jdoerfert: I usually like the SmallPtrSet but that's taste I guess. | |||||
Not Done ReplyInline ActionsChanged. grosser: Changed. | |||||
void IslNodeBuilder::updateValues( | |||||
ParallelLoopGenerator::ValueToValueMapTy &NewValues) { | |||||
SmallPtrSet<Value *, 5> Inserted; | |||||
for (const auto &I : IDToValue) { | |||||
IDToValue[I.first] = NewValues[I.second]; | |||||
Inserted.insert(I.second); | |||||
} | |||||
for (const auto &I : NewValues) { | |||||
if (Inserted.count(I.first)) | |||||
continue; | |||||
ValueMap[I.first] = I.second; | |||||
} | |||||
} | |||||
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, | void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, | ||||
std::vector<Value *> &IVS, | std::vector<Value *> &IVS, | ||||
__isl_take isl_id *IteratorID, | __isl_take isl_id *IteratorID, | ||||
__isl_take isl_union_map *Schedule) { | __isl_take isl_union_map *Schedule) { | ||||
isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); | isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); | ||||
isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); | isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); | ||||
isl_id *Id = isl_ast_expr_get_id(StmtExpr); | isl_id *Id = isl_ast_expr_get_id(StmtExpr); | ||||
isl_ast_expr_free(StmtExpr); | isl_ast_expr_free(StmtExpr); | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | case isl_ast_node_block: { | ||||
isl_ast_node_list_free(List); | isl_ast_node_list_free(List); | ||||
break; | break; | ||||
} | } | ||||
default: | default: | ||||
isl_ast_node_dump(Body); | isl_ast_node_dump(Body); | ||||
llvm_unreachable("Unhandled isl_ast_node in vectorizer"); | llvm_unreachable("Unhandled isl_ast_node in vectorizer"); | ||||
} | } | ||||
IDToValue.erase(IteratorID); | IDToValue.erase(IDToValue.find(IteratorID)); | ||||
isl_id_free(IteratorID); | isl_id_free(IteratorID); | ||||
isl_union_map_free(Schedule); | isl_union_map_free(Schedule); | ||||
isl_ast_node_free(For); | isl_ast_node_free(For); | ||||
isl_ast_expr_free(Iterator); | isl_ast_expr_free(Iterator); | ||||
} | } | ||||
void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) { | void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) { | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) { | ||||
IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, LI, DT, ExitBlock, | IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, LI, DT, ExitBlock, | ||||
Predicate, &Annotator, Parallel, UseGuardBB); | Predicate, &Annotator, Parallel, UseGuardBB); | ||||
IDToValue[IteratorID] = IV; | IDToValue[IteratorID] = IV; | ||||
create(Body); | create(Body); | ||||
Annotator.popLoop(Parallel); | Annotator.popLoop(Parallel); | ||||
IDToValue.erase(IteratorID); | IDToValue.erase(IDToValue.find(IteratorID)); | ||||
Builder.SetInsertPoint(ExitBlock->begin()); | Builder.SetInsertPoint(ExitBlock->begin()); | ||||
isl_ast_node_free(For); | isl_ast_node_free(For); | ||||
isl_ast_expr_free(Iterator); | isl_ast_expr_free(Iterator); | ||||
isl_id_free(IteratorID); | isl_id_free(IteratorID); | ||||
} | } | ||||
/// @brief Remove the BBs contained in a (sub)function from the dominator tree. | |||||
/// | |||||
/// This function removes the basic blocks that are part of a subfunction from | |||||
Not Done ReplyInline ActionsBack to stdlib... jdoerfert: Back to stdlib... | |||||
/// the dominator tree. Specifically, when generating code it may happen that at | |||||
Not Done ReplyInline ActionsThis is hard to understand. If I got it correctly (but please tell me if I'm wrong), you collect all nodes of a function in post order in a set and erase them afterwards from the dominator tree of the function, right? jdoerfert: This is hard to understand. If I got it correctly (but please tell me if I'm wrong), you… | |||||
/// some point the code generation continues in a new sub-function (e.g., when | |||||
/// generating OpenMP code). The basic blocks that are created in this | |||||
/// sub-function are then still part of the dominator tree of the original | |||||
/// function, such that the dominator tree reaches over function boundaries. | |||||
/// This is not only incorrect, but also causes crashes. This function now | |||||
/// removes from the dominator tree all basic blocks that are dominated (and | |||||
Not Done ReplyInline ActionsI stoped reviewing here, will continue tonight. jdoerfert: I stoped reviewing here, will continue tonight. | |||||
/// consequently reachable) from the entry block of this (sub)function. | |||||
/// | |||||
/// FIXME: A LLVM (function or region) pass should not touch anything outside of | |||||
/// the function/region it runs on. Hence, the pure need for this function shows | |||||
/// that we do not comply to this rule. At the moment, this does not cause any | |||||
/// issues, but we should be aware that such issues may appear. Unfortunately | |||||
/// the current LLVM pass infrastructure does not allow to make Polly a module | |||||
/// or call-graph pass to solve this issue, as such a pass would not have access | |||||
/// to the per-function analyses passes needed by Polly. A future pass manager | |||||
/// infrastructure is supposed to enable such kind of access possibly allowing | |||||
/// us to create a cleaner solution here. | |||||
Not Done ReplyInline Actionsremove the newline jdoerfert: remove the newline | |||||
/// | |||||
/// FIXME: Instead of adding the dominance information and then dropping it | |||||
/// later on, we should try to just not add it in the first place. This requires | |||||
Not Done ReplyInline ActionsI would call it It and ItId but that's not important. jdoerfert: I would call it It and ItId but that's not important. | |||||
/// some careful testing to make sure this does not break in interaction with | |||||
Not Done ReplyInline ActionsWhat about a flag to the loopgenerator such that it will not add these blocks to the DT at all (or just omit the DT argument)? jdoerfert: What about a flag to the loopgenerator such that it will not add these blocks to the DT at all… | |||||
Not Done ReplyInline ActionsInteresting idea. This requires some work to understand if this is possible. I added a FIXME. For now I propose to use the same solution as we had in the CLooG backend. grosser: Interesting idea. This requires some work to understand if this is possible. I added a FIXME. | |||||
/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or | |||||
/// which may try to update it. | |||||
/// | |||||
/// @param F The function which contains the BBs to removed. | |||||
/// @param DT The dominator tree from which to remove the BBs. | |||||
static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) { | |||||
DomTreeNode *N = DT.getNode(&F->getEntryBlock()); | |||||
std::vector<BasicBlock *> Nodes; | |||||
// We can only remove an element from the dominator tree, if all its children | |||||
// have been removed. To ensure this we obtain the list of nodes to remove | |||||
// using a post-order tree traversal. | |||||
for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) | |||||
Nodes.push_back(I->getBlock()); | |||||
for (BasicBlock *BB : Nodes) | |||||
DT.eraseNode(BB); | |||||
} | |||||
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { | |||||
isl_ast_node *Body; | |||||
isl_ast_expr *Init, *Inc, *Iterator, *UB; | |||||
isl_id *IteratorID; | |||||
Value *ValueLB, *ValueUB, *ValueInc; | |||||
Type *MaxType; | |||||
Value *IV; | |||||
CmpInst::Predicate Predicate; | |||||
Body = isl_ast_node_for_get_body(For); | |||||
Init = isl_ast_node_for_get_init(For); | |||||
Inc = isl_ast_node_for_get_inc(For); | |||||
Iterator = isl_ast_node_for_get_iterator(For); | |||||
IteratorID = isl_ast_expr_get_id(Iterator); | |||||
UB = getUpperBound(For, Predicate); | |||||
ValueLB = ExprBuilder.create(Init); | |||||
ValueUB = ExprBuilder.create(UB); | |||||
ValueInc = ExprBuilder.create(Inc); | |||||
// OpenMP always uses SLE. In case the isl generated AST uses a SLT | |||||
// expression, we need to adjust the loop blound by one. | |||||
if (Predicate == CmpInst::ICMP_SLT) | |||||
ValueUB = Builder.CreateAdd( | |||||
ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); | |||||
Not Done ReplyInline ActionsI still do not know why we need this, shouldn't we just create the domtree on the fly (when we create the subfunc) correctly? jdoerfert: I still do not know why we need this, shouldn't we just create the domtree on the fly (when we… | |||||
MaxType = ExprBuilder.getType(Iterator); | |||||
MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); | |||||
MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); | |||||
MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); | |||||
if (MaxType != ValueLB->getType()) | |||||
ValueLB = Builder.CreateSExt(ValueLB, MaxType); | |||||
if (MaxType != ValueUB->getType()) | |||||
ValueUB = Builder.CreateSExt(ValueUB, MaxType); | |||||
if (MaxType != ValueInc->getType()) | |||||
ValueInc = Builder.CreateSExt(ValueInc, MaxType); | |||||
BasicBlock::iterator LoopBody; | |||||
SetVector<Value *> SubtreeValues; | |||||
Not Done ReplyInline Actions... generate code for SCEVs... simbuerg: ... generate code for SCEVs... | |||||
Not Done ReplyInline ActionsFixed. grosser: Fixed. | |||||
SetVector<const Loop *> Loops; | |||||
getReferencesInSubtree(For, SubtreeValues, Loops); | |||||
// Create for all loops we depend on values that contain the current loop | |||||
// iteration. These values are necessary to generate code for SCEVs that | |||||
// depend on such loops. As a result we need to pass them to the subfunction. | |||||
for (const Loop *L : Loops) { | |||||
const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), | |||||
SE.getUnknown(Builder.getInt64(1)), | |||||
L, SCEV::FlagAnyWrap); | |||||
Value *V = generateSCEV(OuterLIV); | |||||
OutsideLoopIterations[L] = SE.getUnknown(V); | |||||
SubtreeValues.insert(V); | |||||
} | |||||
ParallelLoopGenerator::ValueToValueMapTy NewValues; | |||||
ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL); | |||||
IV = ParallelLoopGen.createParallelLoop(ValueLB, ValueUB, ValueInc, | |||||
SubtreeValues, NewValues, &LoopBody); | |||||
BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); | |||||
Builder.SetInsertPoint(LoopBody); | |||||
// Save the current values. | |||||
ValueMapT ValueMapCopy = ValueMap; | |||||
IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; | |||||
updateValues(NewValues); | |||||
IDToValue[IteratorID] = IV; | |||||
create(Body); | |||||
// Restore the original values. | |||||
ValueMap = ValueMapCopy; | |||||
IDToValue = IDToValueCopy; | |||||
Builder.SetInsertPoint(AfterLoop); | |||||
removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT); | |||||
for (const Loop *L : Loops) | |||||
OutsideLoopIterations.erase(L); | |||||
isl_ast_node_free(For); | |||||
isl_ast_expr_free(Iterator); | |||||
isl_id_free(IteratorID); | |||||
} | |||||
void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { | void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { | ||||
bool Vector = PollyVectorizerChoice != VECTORIZER_NONE; | bool Vector = PollyVectorizerChoice != VECTORIZER_NONE; | ||||
if (Vector && IslAstInfo::isInnermostParallel(For) && | if (Vector && IslAstInfo::isInnermostParallel(For) && | ||||
!IslAstInfo::isReductionParallel(For)) { | !IslAstInfo::isReductionParallel(For)) { | ||||
int VectorWidth = getNumberOfIterations(For); | int VectorWidth = getNumberOfIterations(For); | ||||
if (1 < VectorWidth && VectorWidth <= 16) { | if (1 < VectorWidth && VectorWidth <= 16) { | ||||
createForVector(For, VectorWidth); | createForVector(For, VectorWidth); | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
if (IslAstInfo::isExecutedInParallel(For)) { | |||||
Not Done ReplyInline ActionsYou already mentioned --enable-polly-openmp but I would like to rename that option when we are at it, e.g., --polly-enable-parallel jdoerfert: You already mentioned --enable-polly-openmp but I would like to rename that option when we are… | |||||
createForParallel(For); | |||||
return; | |||||
} | |||||
createForSequential(For); | createForSequential(For); | ||||
} | } | ||||
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { | void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { | ||||
isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); | isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); | ||||
Function *F = Builder.GetInsertBlock()->getParent(); | Function *F = Builder.GetInsertBlock()->getParent(); | ||||
LLVMContext &Context = F->getContext(); | LLVMContext &Context = F->getContext(); | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { | ||||
// result will always fit into the type of the original induction variable | // result will always fit into the type of the original induction variable | ||||
// (because we calculate a value of the original induction variable). | // (because we calculate a value of the original induction variable). | ||||
const Value *OldIV = Stmt->getInductionVariableForDimension(i); | const Value *OldIV = Stmt->getInductionVariableForDimension(i); | ||||
if (OldIV) { | if (OldIV) { | ||||
V = Builder.CreateIntCast(V, OldIV->getType(), true); | V = Builder.CreateIntCast(V, OldIV->getType(), true); | ||||
VMap[OldIV] = V; | VMap[OldIV] = V; | ||||
} | } | ||||
} | } | ||||
Not Done ReplyInline ActionsThis needs some docu, I don't know why we need this here? Whats in which map? jdoerfert: This needs some docu, I don't know why we need this here? Whats in which map? | |||||
// Add the current ValueMap to our per-statement value map. | |||||
// | |||||
// This is needed e.g. to rewrite array base addresses when moving code | |||||
// into a parallely executed subfunction. | |||||
VMap.insert(ValueMap.begin(), ValueMap.end()); | |||||
isl_ast_expr_free(Expr); | isl_ast_expr_free(Expr); | ||||
} | } | ||||
void IslNodeBuilder::createSubstitutionsVector( | void IslNodeBuilder::createSubstitutionsVector( | ||||
__isl_take isl_ast_expr *Expr, ScopStmt *Stmt, VectorValueMapT &VMap, | __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, VectorValueMapT &VMap, | ||||
std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, | std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, | ||||
__isl_take isl_id *IteratorID) { | __isl_take isl_id *IteratorID) { | ||||
int i = 0; | int i = 0; | ||||
Show All 16 Lines | void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { | ||||
isl_id *Id; | isl_id *Id; | ||||
ScopStmt *Stmt; | ScopStmt *Stmt; | ||||
isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); | isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); | ||||
isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); | isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); | ||||
Id = isl_ast_expr_get_id(StmtExpr); | Id = isl_ast_expr_get_id(StmtExpr); | ||||
isl_ast_expr_free(StmtExpr); | isl_ast_expr_free(StmtExpr); | ||||
LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); | |||||
Stmt = (ScopStmt *)isl_id_get_user(Id); | Stmt = (ScopStmt *)isl_id_get_user(Id); | ||||
createSubstitutions(Expr, Stmt, VMap, LTS); | createSubstitutions(Expr, Stmt, VMap, LTS); | ||||
BlockGenerator::generate(Builder, *Stmt, VMap, LTS, P, LI, SE, | BlockGenerator::generate(Builder, *Stmt, VMap, LTS, P, LI, SE, | ||||
IslAstInfo::getBuild(User), &ExprBuilder); | IslAstInfo::getBuild(User), &ExprBuilder); | ||||
isl_ast_node_free(User); | isl_ast_node_free(User); | ||||
isl_id_free(Id); | isl_id_free(Id); | ||||
Show All 36 Lines | for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) { | ||||
isl_id *Id; | isl_id *Id; | ||||
Id = isl_set_get_dim_id(Context, isl_dim_param, i); | Id = isl_set_get_dim_id(Context, isl_dim_param, i); | ||||
IDToValue[Id] = generateSCEV((const SCEV *)isl_id_get_user(Id)); | IDToValue[Id] = generateSCEV((const SCEV *)isl_id_get_user(Id)); | ||||
isl_id_free(Id); | isl_id_free(Id); | ||||
} | } | ||||
// Generate values for the current loop iteration for all surrounding loops. | |||||
// | |||||
// We may also reference loops outside of the scop which do not contain the | |||||
// scop itself, but as the number of such scops may be arbitrarily large we do | |||||
// not generate code for them here, but only at the point of code generation | |||||
Not Done ReplyInline ActionsI don't understand the comment? e.g., What do you mean by ahead of time? When do we generate the references then? jdoerfert: I don't understand the comment? e.g., What do you mean by ahead of time? When do we generate… | |||||
Not Done ReplyInline ActionsChanged to: + We may also reference loops outside of the scop which do not contain the grosser: Changed to:
+ // We may also reference loops outside of the scop which do not contain the
+… | |||||
// where these values are needed. | |||||
Region &R = S.getRegion(); | |||||
Loop *L = LI.getLoopFor(R.getEntry()); | |||||
while (L != nullptr && R.contains(L)) | |||||
L = L->getParentLoop(); | |||||
while (L != nullptr) { | |||||
const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), | |||||
SE.getUnknown(Builder.getInt64(1)), | |||||
L, SCEV::FlagAnyWrap); | |||||
Value *V = generateSCEV(OuterLIV); | |||||
OutsideLoopIterations[L] = SE.getUnknown(V); | |||||
L = L->getParentLoop(); | |||||
} | |||||
isl_set_free(Context); | isl_set_free(Context); | ||||
} | } | ||||
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { | Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { | ||||
Instruction *InsertLocation = --(Builder.GetInsertBlock()->end()); | Instruction *InsertLocation = --(Builder.GetInsertBlock()->end()); | ||||
return Rewriter->expandCodeFor(Expr, cast<IntegerType>(Expr->getType()), | return Rewriter->expandCodeFor(Expr, cast<IntegerType>(Expr->getType()), | ||||
InsertLocation); | InsertLocation); | ||||
} | } | ||||
namespace { | namespace { | ||||
class IslCodeGeneration : public ScopPass { | class IslCodeGeneration : public ScopPass { | ||||
public: | public: | ||||
static char ID; | static char ID; | ||||
IslCodeGeneration() : ScopPass(ID) {} | IslCodeGeneration() : ScopPass(ID) {} | ||||
/// @brief The datalayout used | |||||
const DataLayout *DL; | |||||
/// @name The analysis passes we need to generate code. | /// @name The analysis passes we need to generate code. | ||||
/// | /// | ||||
///{ | ///{ | ||||
LoopInfo *LI; | LoopInfo *LI; | ||||
IslAstInfo *AI; | IslAstInfo *AI; | ||||
DominatorTree *DT; | DominatorTree *DT; | ||||
ScalarEvolution *SE; | ScalarEvolution *SE; | ||||
///} | ///} | ||||
Show All 15 Lines | Value *buildRTC(PollyIRBuilder &Builder, IslExprBuilder &ExprBuilder) { | ||||
return RTC; | return RTC; | ||||
} | } | ||||
bool runOnScop(Scop &S) { | bool runOnScop(Scop &S) { | ||||
LI = &getAnalysis<LoopInfo>(); | LI = &getAnalysis<LoopInfo>(); | ||||
AI = &getAnalysis<IslAstInfo>(); | AI = &getAnalysis<IslAstInfo>(); | ||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | ||||
SE = &getAnalysis<ScalarEvolution>(); | SE = &getAnalysis<ScalarEvolution>(); | ||||
DL = &getAnalysis<DataLayoutPass>().getDataLayout(); | |||||
assert(!S.getRegion().isTopLevelRegion() && | assert(!S.getRegion().isTopLevelRegion() && | ||||
"Top level regions are not supported"); | "Top level regions are not supported"); | ||||
// Build the alias scopes for annotations first. | // Build the alias scopes for annotations first. | ||||
if (PollyAnnotateAliasScopes) | if (PollyAnnotateAliasScopes) | ||||
Annotator.buildAliasScopes(S); | Annotator.buildAliasScopes(S); | ||||
BasicBlock *EnteringBB = simplifyRegion(&S, this); | BasicBlock *EnteringBB = simplifyRegion(&S, this); | ||||
PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); | PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); | ||||
IslNodeBuilder NodeBuilder(Builder, Annotator, this, *LI, *SE, *DT); | IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); | ||||
NodeBuilder.addParameters(S.getContext()); | NodeBuilder.addParameters(S.getContext()); | ||||
Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); | Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); | ||||
BasicBlock *StartBlock = executeScopConditionally(S, this, RTC); | BasicBlock *StartBlock = executeScopConditionally(S, this, RTC); | ||||
Builder.SetInsertPoint(StartBlock->begin()); | Builder.SetInsertPoint(StartBlock->begin()); | ||||
NodeBuilder.create(AI->getAst()); | NodeBuilder.create(AI->getAst()); | ||||
return true; | return true; | ||||
} | } | ||||
virtual void printScop(raw_ostream &OS) const {} | virtual void printScop(raw_ostream &OS) const {} | ||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const { | virtual void getAnalysisUsage(AnalysisUsage &AU) const { | ||||
AU.addRequired<DataLayoutPass>(); | |||||
Not Done ReplyInline ActionsWhere is that one used? Was it missing before? jdoerfert: Where is that one used? Was it missing before? | |||||
AU.addRequired<DominatorTreeWrapperPass>(); | AU.addRequired<DominatorTreeWrapperPass>(); | ||||
AU.addRequired<IslAstInfo>(); | AU.addRequired<IslAstInfo>(); | ||||
AU.addRequired<RegionInfoPass>(); | AU.addRequired<RegionInfoPass>(); | ||||
AU.addRequired<ScalarEvolution>(); | AU.addRequired<ScalarEvolution>(); | ||||
AU.addRequired<ScopDetection>(); | AU.addRequired<ScopDetection>(); | ||||
AU.addRequired<ScopInfo>(); | AU.addRequired<ScopInfo>(); | ||||
AU.addRequired<LoopInfo>(); | AU.addRequired<LoopInfo>(); | ||||
Show All 32 Lines |
out-of-scop