Skip to content

Commit

Permalink
Identify and hoist definitively invariant loads
Browse files Browse the repository at this point in the history
  As a first step in the direction of assumed invariant loads (loads
  that are not written in some context) we now detect and hoist
  definitively invariant loads. These invariant loads will be preloaded
  in the code generation and used in the optimized version of the SCoP.
  If the load is only conditionally executed the preloaded version will
  also only be executed under the same condition, hence we will never
  access memory that wouldn't have been accessed otherwise. This is also
  the most distinguishing feature to licm.

  As hoisting can make statements empty we will simplify the SCoP and
  remove empty statements that would otherwise cause artifacts in the
  code generation.

Differential Revision: http://reviews.llvm.org/D13194

llvm-svn: 248861
Johannes Doerfert committed Sep 29, 2015
1 parent f6343d7 commit c1db67e
Showing 23 changed files with 534 additions and 155 deletions.
18 changes: 18 additions & 0 deletions polly/include/polly/CodeGen/IslNodeBuilder.h
Original file line number Diff line number Diff line change
@@ -42,6 +42,9 @@ class IslNodeBuilder {
void addParameters(__isl_take isl_set *Context);
void create(__isl_take isl_ast_node *Node);

/// @brief Preload all memory loads that are invariant.
void preloadInvariantLoads();

/// @brief Finalize code generation for the SCoP @p S.
///
/// @see BlockGenerator::finalizeSCoP(Scop &S)
@@ -190,6 +193,21 @@ class IslNodeBuilder {
/// @param Mark The node we generate code for.
virtual void createMark(__isl_take isl_ast_node *Marker);
virtual void createFor(__isl_take isl_ast_node *For);

/// @brief Preload the memory load access @p MA.
///
/// If @p MA is not always executed it will be conditionally loaded and
/// merged with undef from the same type. Hence, if @p MA is executed only
/// under condition C then the preload code will look like this:
///
/// MA_preload = undef;
/// if (C)
/// MA_preload = load MA;
/// use MA_preload
Value *preloadInvariantLoad(const MemoryAccess &MA,
__isl_take isl_set *Domain,
__isl_keep isl_ast_build *Build);

void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
void createForSequential(__isl_take isl_ast_node *For);

44 changes: 40 additions & 4 deletions polly/include/polly/ScopInfo.h
Original file line number Diff line number Diff line change
@@ -127,6 +127,9 @@ class ScopArrayInfo {
/// @brief Destructor to free the isl id of the base pointer.
~ScopArrayInfo();

/// @brief Set the base pointer to @p BP.
void setBasePtr(Value *BP) { BasePtr = BP; }

/// @brief Return the base pointer.
Value *getBasePtr() const { return BasePtr; }

@@ -690,6 +693,15 @@ class MemoryAccess {
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
MemoryAccess::ReductionType RT);

/// @brief Ordered list type to hold accesses.
using MemoryAccessList = std::forward_list<MemoryAccess *>;

/// @brief Type for invariant memory accesses and their domain context.
using InvariantAccessTy = std::pair<MemoryAccess *, isl_set *>;

/// @brief Type for multiple invariant memory accesses and their domain context.
using InvariantAccessesTy = SmallVector<InvariantAccessTy, 8>;

///===----------------------------------------------------------------------===//
/// @brief Statement of the Scop
///
@@ -700,9 +712,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
/// At the moment every statement represents a single basic block of LLVM-IR.
class ScopStmt {
public:
/// @brief List to hold all (scalar) memory accesses mapped to an instruction.
using MemoryAccessList = std::forward_list<MemoryAccess *>;

ScopStmt(const ScopStmt &) = delete;
const ScopStmt &operator=(const ScopStmt &) = delete;

@@ -880,6 +889,9 @@ class ScopStmt {
/// @brief Return true if this statement represents a whole region.
bool isRegionStmt() const { return R != nullptr; }

/// @brief Return true if this statement does not contain any accesses.
bool isEmpty() const { return MemAccs.empty(); }

/// @brief Return the (scalar) memory accesses for @p Inst.
const MemoryAccessList &getAccessesFor(const Instruction *Inst) const {
MemoryAccessList *MAL = lookupAccessesFor(Inst);
@@ -913,6 +925,13 @@ class ScopStmt {
BB = Block;
}

/// @brief Move the memory access in @p InvMAs to @p TargetList.
///
/// Note that scalar accesses that are caused by any access in @p InvMAs will
/// be eliminated too.
void hoistMemoryAccesses(MemoryAccessList &InvMAs,
InvariantAccessesTy &TargetList);

typedef MemoryAccessVec::iterator iterator;
typedef MemoryAccessVec::const_iterator const_iterator;

@@ -1023,7 +1042,7 @@ class Scop {
/// Max loop depth.
unsigned MaxLoopDepth;

typedef std::deque<ScopStmt> StmtSet;
typedef std::list<ScopStmt> StmtSet;
/// The statements in this Scop.
StmtSet Stmts;

@@ -1130,6 +1149,9 @@ class Scop {
/// group to ensure the SCoP is executed in an alias free environment.
MinMaxVectorPairVectorTy MinMaxAliasGroups;

/// @brief List of invariant accesses.
InvariantAccessesTy InvariantAccesses;

/// @brief Scop constructor; invoked from ScopInfo::buildScop.
Scop(Region &R, AccFuncMapType &AccFuncMap, ScalarEvolution &SE,
DominatorTree &DT, isl_ctx *ctx, unsigned MaxLoopDepth);
@@ -1183,6 +1205,15 @@ class Scop {
/// @brief Add parameter constraints to @p C that imply a non-empty domain.
__isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const;

/// @brief Simplify the SCoP representation
///
/// At the moment we perform the following simplifications:
/// - removal of empty statements (due to invariant load hoisting)
void simplifySCoP();

/// @brief Hoist all invariant memory loads.
void hoistInvariantLoads();

/// @brief Build the Context of the Scop.
void buildContext();

@@ -1313,6 +1344,11 @@ class Scop {
/// @return The maximum depth of the loop.
inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; }

/// @brief Return the set of invariant accesses.
const InvariantAccessesTy &getInvariantAccesses() const {
return InvariantAccesses;
}

/// @brief Mark the SCoP as optimized by the scheduler.
void markAsOptimized() { IsOptimized = true; }

127 changes: 127 additions & 0 deletions polly/lib/Analysis/ScopInfo.cpp
Original file line number Diff line number Diff line change
@@ -1350,6 +1350,46 @@ void ScopStmt::print(raw_ostream &OS) const {

void ScopStmt::dump() const { print(dbgs()); }

void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
InvariantAccessesTy &TargetList) {

// Remove all memory accesses in @p InvMAs from this statement together
// with all scalar accesses that were caused by them. The tricky iteration
// order uses is needed because the MemAccs is a vector and the order in
// which the accesses of each memory access list (MAL) are stored in this
// vector is reversed.
for (MemoryAccess *MA : InvMAs) {
auto &MAL = *lookupAccessesFor(MA->getAccessInstruction());
MAL.reverse();

auto MALIt = MAL.begin();
auto MALEnd = MAL.end();
auto MemAccsIt = MemAccs.begin();
while (MALIt != MALEnd) {
while (*MemAccsIt != *MALIt)
MemAccsIt++;

MALIt++;
MemAccs.erase(MemAccsIt);
}

InstructionToAccess.erase(MA->getAccessInstruction());
delete &MAL;
}

// Get the context under which this statement, hence the memory accesses, are
// executed.
isl_set *DomainCtx = isl_set_params(getDomain());
DomainCtx = isl_set_remove_redundancies(DomainCtx);
DomainCtx = isl_set_detect_equalities(DomainCtx);
DomainCtx = isl_set_coalesce(DomainCtx);

for (MemoryAccess *MA : InvMAs)
TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx)));

isl_set_free(DomainCtx);
}

//===----------------------------------------------------------------------===//
/// Scop class implement

@@ -2268,6 +2308,9 @@ void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) {
buildBoundaryContext();
simplifyContexts();
buildAliasChecks(AA);

hoistInvariantLoads();
simplifySCoP();
}

Scop::~Scop() {
@@ -2290,6 +2333,9 @@ Scop::~Scop() {
isl_pw_multi_aff_free(MMA.second);
}
}

for (const auto &IA : InvariantAccesses)
isl_set_free(IA.second);
}

void Scop::updateAccessDimensionality() {
@@ -2298,6 +2344,81 @@ void Scop::updateAccessDimensionality() {
Access->updateDimensionality();
}

void Scop::simplifySCoP() {

for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
ScopStmt &Stmt = *StmtIt;

if (!StmtIt->isEmpty()) {
StmtIt++;
continue;
}

if (Stmt.isRegionStmt())
for (BasicBlock *BB : Stmt.getRegion()->blocks())
StmtMap.erase(BB);
else
StmtMap.erase(Stmt.getBasicBlock());

StmtIt = Stmts.erase(StmtIt);
}
}

void Scop::hoistInvariantLoads() {
isl_union_map *Writes = getWrites();
for (ScopStmt &Stmt : *this) {

// TODO: Loads that are not loop carried, hence are in a statement with
// zero iterators, are by construction invariant, though we
// currently "hoist" them anyway.

isl_set *Domain = Stmt.getDomain();
MemoryAccessList InvMAs;

for (MemoryAccess *MA : Stmt) {
if (MA->isImplicit() || MA->isWrite() || !MA->isAffine())
continue;

isl_map *AccessRelation = MA->getAccessRelation();
if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0,
Stmt.getNumIterators())) {
isl_map_free(AccessRelation);
continue;
}

AccessRelation =
isl_map_intersect_domain(AccessRelation, isl_set_copy(Domain));
isl_set *AccessRange = isl_map_range(AccessRelation);

isl_union_map *Written = isl_union_map_intersect_range(
isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange));
bool IsWritten = !isl_union_map_is_empty(Written);
isl_union_map_free(Written);

if (IsWritten)
continue;

InvMAs.push_front(MA);
}

// We inserted invariant accesses always in the front but need them to be
// sorted in a "natural order". The statements are already sorted in reverse
// post order and that suffices for the accesses too. The reason we require
// an order in the first place is the dependences between invariant loads
// that can be caused by indirect loads.
InvMAs.reverse();

// Transfer the memory access from the statement to the SCoP.
Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses);

isl_set_free(Domain);
}
isl_union_map_free(Writes);

if (!InvariantAccesses.empty())
IsOptimized = true;
}

const ScopArrayInfo *
Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType,
ArrayRef<const SCEV *> Sizes, bool IsPHI) {
@@ -2478,6 +2599,12 @@ void Scop::print(raw_ostream &OS) const {
<< "\n";
OS.indent(4) << "Region: " << getNameStr() << "\n";
OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
OS.indent(4) << "Invariant Accesses: {\n";
for (const auto &IA : InvariantAccesses) {
IA.first->print(OS);
OS.indent(12) << "Execution Context: " << IA.second << "\n";
}
OS.indent(4) << "}\n";
printContext(OS.indent(4));
printArrayInfo(OS.indent(4));
printAliasAssumptions(OS);
11 changes: 11 additions & 0 deletions polly/lib/CodeGen/BlockGenerators.cpp
Original file line number Diff line number Diff line change
@@ -108,6 +108,8 @@ Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old,
return const_cast<Value *>(Old);

if (Value *New = GlobalMap.lookup(Old)) {
if (Value *NewRemapped = GlobalMap.lookup(New))
New = NewRemapped;
if (Old->getType()->getScalarSizeInBits() <
New->getType()->getScalarSizeInBits())
New = Builder.CreateTruncOrBitCast(New, Old->getType());
@@ -226,6 +228,9 @@ Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load,
ValueMapT &BBMap, LoopToScevMapT &LTS,
isl_id_to_ast_expr *NewAccesses) {
if (Value *PreloadLoad = GlobalMap.lookup(Load))
return PreloadLoad;

const Value *Pointer = Load->getPointerOperand();
Value *NewPointer =
generateLocationAccessed(Stmt, Load, Pointer, BBMap, LTS, NewAccesses);
@@ -762,6 +767,12 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad(
void VectorBlockGenerator::generateLoad(
ScopStmt &Stmt, const LoadInst *Load, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
Load->getName() + "_p");
return;
}

if (!VectorType::isValidElementType(Load->getType())) {
for (int i = 0; i < getVectorWidth(); i++)
ScalarMaps[i][Load] =
3 changes: 2 additions & 1 deletion polly/lib/CodeGen/CodeGeneration.cpp
Original file line number Diff line number Diff line change
@@ -146,8 +146,9 @@ class CodeGeneration : public ScopPass {
auto SplitBlock = StartBlock->getSinglePredecessor();
Builder.SetInsertPoint(SplitBlock->getTerminator());
NodeBuilder.addParameters(S.getContext());
NodeBuilder.preloadInvariantLoads();
Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder());
SplitBlock->getTerminator()->setOperand(0, RTC);
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
Builder.SetInsertPoint(StartBlock->begin());

NodeBuilder.create(AstRoot);
117 changes: 117 additions & 0 deletions polly/lib/CodeGen/IslNodeBuilder.cpp
Original file line number Diff line number Diff line change
@@ -814,6 +814,123 @@ void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
llvm_unreachable("Unknown isl_ast_node type");
}

/// @brief Create the actual preload memory access for @p MA.
static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA,
isl_ast_build *Build,
IslExprBuilder &ExprBuilder) {
isl_set *AccessRange = isl_map_range(MA.getAccessRelation());
isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext());
isl_ast_expr *Access =
isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
return ExprBuilder.create(Access);
}

Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
isl_set *Domain,
isl_ast_build *Build) {

isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
isl_set_free(Universe);

if (AlwaysExecuted) {
isl_set_free(Domain);
return createPreloadLoad(S, MA, Build, ExprBuilder);
} else {

isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);

Value *Cond = ExprBuilder.create(DomainCond);
if (!Cond->getType()->isIntegerTy(1))
Cond = Builder.CreateIsNotNull(Cond);

BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
Builder.GetInsertPoint(), &DT, &LI);
CondBB->setName("polly.preload.cond");

BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), &DT, &LI);
MergeBB->setName("polly.preload.merge");

Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);

DT.addNewBlock(ExecBB, CondBB);
if (Loop *L = LI.getLoopFor(CondBB))
L->addBasicBlockToLoop(ExecBB, LI);

auto *CondBBTerminator = CondBB->getTerminator();
Builder.SetInsertPoint(CondBBTerminator);
Builder.CreateCondBr(Cond, ExecBB, MergeBB);
CondBBTerminator->eraseFromParent();

Builder.SetInsertPoint(ExecBB);
Builder.CreateBr(MergeBB);

Builder.SetInsertPoint(ExecBB->getTerminator());
Instruction *AccInst = MA.getAccessInstruction();
Type *AccInstTy = AccInst->getType();
Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder);

Builder.SetInsertPoint(MergeBB->getTerminator());
auto *MergePHI = Builder.CreatePHI(
AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
MergePHI->addIncoming(PreAccInst, ExecBB);
MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);

return MergePHI;
}
}

void IslNodeBuilder::preloadInvariantLoads() {

const auto &InvAccList = S.getInvariantAccesses();
if (InvAccList.empty())
return;

const Region &R = S.getRegion();

BasicBlock *PreLoadBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
PreLoadBB->setName("polly.preload.begin");
Builder.SetInsertPoint(PreLoadBB->begin());

isl_ast_build *Build =
isl_ast_build_from_context(isl_set_universe(S.getParamSpace()));

for (const auto &IA : InvAccList) {
MemoryAccess *MA = IA.first;
assert(!MA->isImplicit());

isl_set *Domain = isl_set_copy(IA.second);
Instruction *AccInst = MA->getAccessInstruction();
Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build);
ValueMap[AccInst] = PreloadVal;

if (SE.isSCEVable(AccInst->getType())) {
isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst));
if (ParamId)
IDToValue[ParamId] = PreloadVal;
isl_id_free(ParamId);
}

SmallVector<Instruction *, 4> Users;
for (auto *U : AccInst->users())
if (Instruction *UI = dyn_cast<Instruction>(U))
if (!R.contains(UI))
Users.push_back(UI);
for (auto *U : Users)
U->replaceUsesOfWith(AccInst, PreloadVal);

auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
for (auto *DerivedSAI : SAI->getDerivedSAIs())
DerivedSAI->setBasePtr(PreloadVal);
}

isl_ast_build_free(Build);
}

void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {

for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) {
3 changes: 2 additions & 1 deletion polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
; }
;
; CHECK: sext i32 %c to i64
; CHECK: sext i32 %c to i64
; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64
; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15
; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64
@@ -23,7 +24,7 @@
; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]]
; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]]
; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]]
; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %1, %[[NoAlias]]
; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %3, %[[NoAlias]]
; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
43 changes: 22 additions & 21 deletions polly/test/Isl/CodeGen/exprModDiv.ll
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
;
; void exprModDiv(float *A, float *B, float *C, long N, long p) {
; for (long i = 0; i < N; i++)
; C[i] += A[i] + B[i] + A[p] + B[p];
; C[i] += A[i] + B[i] + A[i] + B[i + p];
; }
;
;
@@ -32,21 +32,21 @@

; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 127 * floord(-p - 1, 127) + 127]
; CHECK: %20 = sub nsw i64 0, %p
; CHECK: %21 = sub nsw i64 %20, 1
; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127
; CHECK: %17 = sub nsw i64 0, %p
; CHECK: %18 = sub nsw i64 %17, 1
; CHECK: %pexp.fdiv_q.0 = sub i64 %18, 127
; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1
; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0
; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21
; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %18, 0
; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %18
; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127
; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %23 = add nsw i64 %p, %22
; CHECK: %24 = add nsw i64 %23, 127
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %20 = add nsw i64 %p, %19
; CHECK: %21 = add nsw i64 %20, 127
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21

; A[p / 127]
; CHECK: %pexp.div = sdiv exact i64 %p, 127
; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div

; A[i % 128]
; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
@@ -58,17 +58,17 @@

; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
; POW2: %20 = sub nsw i64 0, %p
; POW2: %21 = sub nsw i64 %20, 1
; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7
; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %23 = add nsw i64 %p, %22
; POW2: %24 = add nsw i64 %23, 128
; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; POW2: %17 = sub nsw i64 0, %p
; POW2: %18 = sub nsw i64 %17, 1
; POW2: %polly.fdiv_q.shr = ashr i64 %18, 7
; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %20 = add nsw i64 %p, %19
; POW2: %21 = add nsw i64 %20, 128
; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21

; A[p / 128]
; POW2: %pexp.div = sdiv exact i64 %p, 128
; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

@@ -87,10 +87,11 @@ for.body: ; preds = %for.cond
%arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp, %tmp1
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %p
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp2 = load float, float* %arrayidx2, align 4
%add3 = fadd float %add, %tmp2
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %p
%padd = add nsw i64 %p, %i.0
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd
%tmp3 = load float, float* %arrayidx4, align 4
%add5 = fadd float %add3, %tmp3
%arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0
39 changes: 39 additions & 0 deletions polly/test/Isl/CodeGen/invariant_load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0
; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B
;
; CHECK-LABEL: polly.stmt.bb2:
; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar
; CHECK-NEXT: store i32 %polly.access.B.load, i32* %scevgep, align 4
;
; void f(int *restrict A, int *restrict B) {
; for (int i = 0; i < 1024; i++)
; A[i] = *B;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @f(i32* noalias %A, i32* noalias %B) {
bb:
br label %bb1

bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5

bb2: ; preds = %bb1
%tmp = load i32, i32* %B, align 4
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp, i32* %tmp3, align 4
br label %bb4

bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1

bb5: ; preds = %bb1
ret void
}
8 changes: 6 additions & 2 deletions polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll
Original file line number Diff line number Diff line change
@@ -4,6 +4,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

%struct.wombat = type {[4 x i32]}

; CHECK: polly.preload.begin:
; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0
; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B
; CHECK-NOT: %polly.access.B.load = load i32, i32* %polly.access.B

; CHECK: polly.stmt.bb3.entry: ; preds = %polly.start
; CHECK: br label %polly.stmt.bb3

@@ -14,8 +19,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: br label %polly.stmt.bb13.exit

; CHECK: polly.stmt.bb5: ; preds = %polly.stmt.bb3
; CHECK: %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2
; CHECK: store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4
; CHECK: store i32 %polly.access.B.load, i32* %polly.access.cast.arg2
; CHECK: br label %polly.stmt.bb13.exit

; Function Attrs: nounwind uwtable
62 changes: 0 additions & 62 deletions polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll

This file was deleted.

14 changes: 4 additions & 10 deletions polly/test/Isl/CodeGen/simple_vec_call.ll
Original file line number Diff line number Diff line change
@@ -24,16 +24,10 @@ return:
ret void
}

; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]]
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0
; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1
; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2
24 changes: 9 additions & 15 deletions polly/test/Isl/CodeGen/simple_vec_call_2.ll
Original file line number Diff line number Diff line change
@@ -24,19 +24,13 @@ return:
ret void
}

; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]]
; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1
; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2
; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3
; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0
; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1
; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2
; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3
; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
; CHECK: attributes [[NUW]] = { nounwind }
10 changes: 6 additions & 4 deletions polly/test/Isl/CodeGen/simple_vec_cast.ll
Original file line number Diff line number Diff line change
@@ -28,8 +28,10 @@ bb4: ; preds = %bb1
ret void
}

; CHECK: %tmp_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8, !alias.scope !0, !noalias !2
; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double>
; CHECK: store <4 x double> %0, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4
; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)

; CHECK: polly.stmt.bb2: ; preds = %polly.start
; CHECK: %tmp_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0
; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double>
; CHECK: store <4 x double> %0, <4 x double>*
7 changes: 5 additions & 2 deletions polly/test/Isl/CodeGen/simple_vec_const.ll
Original file line number Diff line number Diff line change
@@ -52,5 +52,8 @@ define i32 @main() nounwind {
}


; CHECK: load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*)
; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer
; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)

; CHECK: polly.stmt.: ; preds = %polly.start
; CHECK: %_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0
; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
9 changes: 6 additions & 3 deletions polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll
Original file line number Diff line number Diff line change
@@ -22,6 +22,9 @@ body:
return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float**>, <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8
; CHECK: %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0)

; CHECK-NOT: load <1 x float**>
; CHECK: %value_p.splatinsert = insertelement <4 x float**> undef, float** %.load, i32 0
; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> undef, <4 x i32> zeroinitializer
; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8
1 change: 1 addition & 0 deletions polly/test/Isl/CodeGen/two-scops-in-row.ll
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ entry:

for.0:
%Scalar0.val = load i32, i32* %Scalar0
store i32 1, i32* %Scalar0
br i1 false, label %for.0, label %for.1.preheader

for.1.preheader:
10 changes: 5 additions & 5 deletions polly/test/ScopInfo/inter_bb_scalar_dep.ll
Original file line number Diff line number Diff line change
@@ -14,6 +14,10 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"

; Function Attrs: nounwind
; CHECK: Invariant
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_init_ptr[0]

define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
br label %for.i
@@ -25,11 +29,7 @@ for.i: ; preds = %for.i.end, %entry

entry.next: ; preds = %for.i
%init = load i64, i64* %init_ptr
; CHECK-LABEL: Stmt_entry_next
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
; CHECK-NOT: Stmt_entry_next
br label %for.j

for.j: ; preds = %for.j, %entry.next
19 changes: 9 additions & 10 deletions polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
Original file line number Diff line number Diff line change
@@ -14,7 +14,12 @@

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"

; Function Attrs: nounwind
; CHECK: Invariant Accesses: {
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_init_ptr[0]
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_init_ptr[0]
; CHECK: }
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
br label %for.i
@@ -26,23 +31,17 @@ for.i: ; preds = %for.i.end, %entry

entry.next: ; preds = %for.i
%init = load i64, i64* %init_ptr
; CHECK-LABEL: Stmt_entry_next
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
; CHECK-NOT: Stmt_entry_next
br label %for.j

for.j: ; preds = %for.j, %entry.next
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
%init_2 = load i64, i64* %init_ptr
%init_sum = add i64 %init, %init_2
; CHECK-LABEL: Stmt_for_j
; CHECK: Stmt_for_j
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_sum, i64* %scevgep
14 changes: 9 additions & 5 deletions polly/test/ScopInfo/intra_bb_scalar_dep.ll
Original file line number Diff line number Diff line change
@@ -14,6 +14,9 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"

; Function Attrs: nounwind
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] };
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
br label %for.i
@@ -32,11 +35,12 @@ for.j: ; preds = %for.j, %entry.next
%init_plus_two = add i64 %init, 2
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_plus_two, i64* %scevgep
; CHECK-LABEL: Stmt_for_j
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
; CHECK: Statements {
; CHECK-NEXT: Stmt_for_j
; CHECK-NOT: ReadAccess
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
; CHECK-NEXT: }
%indvar.j.next = add nsw i64 %indvar.j, 1
%exitcond.j = icmp eq i64 %indvar.j.next, %N
br i1 %exitcond.j, label %for.i.end, label %for.j
35 changes: 35 additions & 0 deletions polly/test/ScopInfo/invariant_load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] };
;
; void f(int *restrict A, int *restrict B) {
; for (int i = 0; i < 1024; i++)
; A[i] = *B;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @f(i32* noalias %A, i32* noalias %B) {
bb:
br label %bb1

bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5

bb2: ; preds = %bb1
%tmp = load i32, i32* %B, align 4
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp, i32* %tmp3, align 4
br label %bb4

bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1

bb5: ; preds = %bb1
ret void
}
52 changes: 52 additions & 0 deletions polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BP[0] };
; CHECK-NEXT: Execution Context: [N] -> { : N >= 514 }
;
; void f(int *BP, int *A, int N) {
; for (int i = 0; i < N; i++)
; if (i > 512)
; A[i] = *BP;
; else
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @f(i32* %BP, i32* %A, i32 %N) {
bb:
%tmp = sext i32 %N to i64
br label %bb1

bb1: ; preds = %bb11, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ]
%tmp2 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp2, label %bb3, label %bb12

bb3: ; preds = %bb1
%tmp4 = icmp sgt i64 %indvars.iv, 512
br i1 %tmp4, label %bb5, label %bb8

bb5: ; preds = %bb3
%tmp9a = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%inv = load i32, i32 *%BP
store i32 %inv, i32* %tmp9a, align 4
br label %bb10

bb8: ; preds = %bb3
%tmp9b = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp9b, align 4
br label %bb10

bb10: ; preds = %bb8, %bb5
br label %bb11

bb11: ; preds = %bb10
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1

bb12: ; preds = %bb1
ret void
}
19 changes: 9 additions & 10 deletions polly/test/ScopInfo/tempscop-printing.ll
Original file line number Diff line number Diff line change
@@ -14,6 +14,10 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"

; CHECK-LABEL: Function: f
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_init_ptr[0]
; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 }

define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
br label %for.i
@@ -24,12 +28,8 @@ for.i:
br label %entry.next

entry.next:
; CHECK: Stmt_entry_next
; CHECK-NOT: Stmt_entry_next
%init = load i64, i64* %init_ptr
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
br label %for.j

for.j:
@@ -55,6 +55,9 @@ return:
}

; CHECK-LABEL: Function: g
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_init_ptr[0]
; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 }
define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
br label %for.i
@@ -65,12 +68,8 @@ for.i:
br label %entry.next

entry.next:
; CHECK: Stmt_entry_next
; CHECK-NOT: Stmt_entry_next
%init = load i64, i64* %init_ptr
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
br label %for.j

for.j:

0 comments on commit c1db67e

Please sign in to comment.