Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -471,6 +471,9 @@ /// @brief Align the parameters in the statement to the scop context void realignParams(); + /// @brief Add the memory access @p MA of instruction @p I to this statement. + void addMemoryAccess(MemoryAccess *MA, Instruction *I); + /// @brief Print the ScopStmt. /// /// @param OS The output stream the ScopStmt is printed to. @@ -705,6 +708,13 @@ /// @brief Align the parameters in the statement to the scop context void realignParams(); + /// @brief Add all global accesses of the SCoP to the statements. + /// + /// Global accesses are instructions which have effects we cannot model + /// precicely but is purely on the memory level. For these instructions + /// we create overaproximated accesses to __all__ arrays in this SCoP. + void addGlobalAccesses(TempScop &TS); + /// @brief Print the static control part. /// /// @param OS The output stream the static control part is printed to. Index: include/polly/TempScopInfo.h =================================================================== --- include/polly/TempScopInfo.h +++ include/polly/TempScopInfo.h @@ -148,6 +148,14 @@ // Access function of bbs. const AccFuncMapType &AccFuncMap; + /// @brief Set of intstructions which may access __all__ memory locations. + /// + /// Elements in this set have unknown access behaviours we have to over + /// approximate. To do so we pretend all memory locations known in the SCoP + /// are accessed by the instruction. The access kind read/write is stored too. + using GlobalAccessInstruction = std::pair; + SmallVector GlobalAccessInstructions; + friend class TempScopInfo; explicit TempScop(Region &r, LoopBoundMapType &loopBounds, @@ -203,6 +211,12 @@ } //@} + /// @brief Get all global access instructions for this TempScop. + const SmallVector & + getGlobalAccessInstructions() const { + return GlobalAccessInstructions; + } + /// @brief Print the Temporary Scop information. /// /// @param OS The output stream the access functions is printed to. @@ -300,9 +314,9 @@ /// /// @param CI The call instruction that might access the memory /// @param L The parent loop of the instruction - /// @param R The region on which we are going to build a TempScop + /// @param TS The TempScop we are building /// @param Accs The set of accesses in which we accumulate IRAccesses - void buildIRCallAccess(CallInst *CI, Loop *L, Region *R, + void buildIRCallAccess(CallInst *CI, Loop *L, TempScop &TS, AccFuncSetType &Accs); /// @brief Analyze and extract the cross-BB scalar dependences (or, @@ -315,7 +329,7 @@ /// Access is required. bool buildScalarDependences(Instruction *Inst, Region *R); - void buildAccessFunctions(Region &RefRegion, BasicBlock &BB); + void buildAccessFunctions(Region &RefRegion, BasicBlock &BB, TempScop &TS); void buildLoopBounds(TempScop &Scop); Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -360,6 +360,30 @@ } } + switch (AA->getModRefBehavior(CalledFunction)) { + case AliasAnalysis::DoesNotAccessMemory: + case AliasAnalysis::OnlyReadsMemory: + return true; + case AliasAnalysis::OnlyReadsArgumentPointees: + case AliasAnalysis::OnlyAccessesArgumentPointees: + for (const auto &ArgUse : CI.arg_operands()) { + Value *Arg = ArgUse.get(); + if (!Arg->getType()->isPointerTy()) + continue; + + AF = SE->getSCEVAtScope(Arg, L); + // Bail if a pointer argument has a base address not known to + // ScalarEvolution. + if (!isa(SE->getPointerBase(AF))) + return false; + } + // TODO: Allow these again once we can write a test case. + // return true; + break; + default: + break; + } + return false; } Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -443,7 +443,7 @@ // Non affine accesses are either real memory accesses with access functions // we can not analyze or special instructions (e.g., memset intrinsics) which - // write and/or read memory. The former will be overaproximatied (e.g., we + // write and/or read memory. The former will be overaproximated (e.g., we // model the access to the whole array) while the later comes with bounds. // Since the overaproximated accesses might not write all array cells they // are marked as MAY_WRITE. @@ -700,18 +700,19 @@ void ScopStmt::buildAccesses(TempScop &tempScop, const Region &CurRegion) { for (auto &&Access : *tempScop.getAccessFunctions(BB)) { - MemAccs.push_back(new MemoryAccess(Access.first, Access.second, this)); - - // We do not track locations for scalar memory accesses at the moment. - if (!Access.first.isScalar()) { - MemoryAccess *&MA = InstructionToAccess[Access.second]; - if (MA) - MemAccs.back()->setNextMA(MA); - MA = MemAccs.back(); - } + MemoryAccess *MA = new MemoryAccess(Access.first, Access.second, this); + addMemoryAccess(MA, Access.second); } } +void ScopStmt::addMemoryAccess(MemoryAccess *MA, Instruction *I) { + MemAccs.push_back(MA); + MemoryAccess *&MARef = InstructionToAccess[I]; + if (MARef) + MA->setNextMA(MARef); + MARef = MA; +} + void ScopStmt::realignParams() { for (MemoryAccess *MA : *this) MA->realignParams(); @@ -1131,6 +1132,60 @@ Stmt->realignParams(); } +void Scop::addGlobalAccesses(TempScop &TS) { + // Determine if we have read/write/both or no global accesses. + bool HasRead = false, HasWrite = false; + for (auto &GlobalAccessPair : TS.getGlobalAccessInstructions()) { + if (GlobalAccessPair.second == IRAccess::READ) + HasRead = true; + else + HasWrite = true; + } + + // Bail if there are no global accesses in this SCoP. + if (!HasRead && !HasWrite) + return; + + SmallVector Subscripts, Sizes; + SmallVector ReadAccesses, WriteAccesses; + DenseMap StmtMap; + SmallPtrSet BaseAddresses; + + const SCEV *ZeroSCEV = SE->getConstant(Type::getInt64Ty(SE->getContext()), 0); + + // Create read/write IRAccesses for all base addresses in the SCoP. + for (ScopStmt *Stmt : *this) { + StmtMap[Stmt->getBasicBlock()] = Stmt; + for (MemoryAccess *MA : *Stmt) { + Value *BaseAddr = MA->getBaseAddr(); + if (!BaseAddresses.insert(BaseAddr)) + continue; + + if (HasRead) + ReadAccesses.push_back(IRAccess(IRAccess::READ, BaseAddr, ZeroSCEV, 1, + /* isAffine? */ false, Subscripts, + Sizes)); + if (HasWrite) + WriteAccesses.push_back( + IRAccess(IRAccess::MAY_WRITE, BaseAddr, ZeroSCEV, 1, + /* isAffine? */ false, Subscripts, Sizes)); + } + } + + // For each global access create read/write accesses based on the IRAccesses. + for (auto &GlobalAccessPair : TS.getGlobalAccessInstructions()) { + Instruction *I = GlobalAccessPair.first; + BasicBlock *BB = I->getParent(); + ScopStmt *Stmt = StmtMap[BB]; + if (GlobalAccessPair.second == IRAccess::READ) + for (const IRAccess &Acc : ReadAccesses) + Stmt->addMemoryAccess(new MemoryAccess(Acc, I, Stmt), I); + else + for (const IRAccess &Acc : WriteAccesses) + Stmt->addMemoryAccess(new MemoryAccess(Acc, I, Stmt), I); + } +} + void Scop::simplifyAssumedContext() { // The parameter constraints of the iteration domains give us a set of // constraints that need to hold for all cases where at least a single @@ -1184,6 +1239,11 @@ addParameterBounds(); simplifyAssumedContext(); + // After we created all SCoP statements we also created all "real" memory + // accesses in the SCoP. Now we can generate the overaproximated ones which + // may access "the whole memory", thus all arrays contained in the SCoP. + addGlobalAccesses(tempScop); + assert(NestLoops.empty() && "NestLoops not empty at top level!"); } Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -189,7 +189,7 @@ Subscripts, Sizes); } -void TempScopInfo::buildIRCallAccess(CallInst *CI, Loop *L, Region *R, +void TempScopInfo::buildIRCallAccess(CallInst *CI, Loop *L, TempScop &TS, AccFuncSetType &Accs) { // Trivial calls can be ignored. if (CI->doesNotAccessMemory()) @@ -270,10 +270,58 @@ } } + bool OnlyReadsPointees = false; + Function *CalledFunction = CI->getCalledFunction(); + switch (AA->getModRefBehavior(CalledFunction)) { + case AliasAnalysis::DoesNotAccessMemory: + return; + case AliasAnalysis::OnlyReadsArgumentPointees: + OnlyReadsPointees = true; + // Fall through + case AliasAnalysis::OnlyAccessesArgumentPointees: + for (const auto &ArgUse : CI->arg_operands()) { + Value *Arg = ArgUse.get(); + if (!Arg->getType()->isPointerTy()) + continue; + + ReadAF = SE->getSCEVAtScope(Arg, L); + ReadBasePtr = dyn_cast(SE->getPointerBase(ReadAF)); + assert(ReadBasePtr && "Could not find base pointer"); + ReadAF = SE->getMinusSCEV(ReadAF, ReadBasePtr); + RSize = + TD->getTypeStoreSize(ReadBasePtr->getType()->getPointerElementType()); + Accs.push_back(std::make_pair( + IRAccess(IRAccess::READ, ReadBasePtr->getValue(), ReadAF, RSize, + false, Subscripts, Sizes, ReadAF), + CI)); + + if (OnlyReadsPointees) + continue; + + WriteAF = SE->getSCEVAtScope(Arg, L); + WriteBasePtr = dyn_cast(SE->getPointerBase(WriteAF)); + assert(WriteBasePtr && "Could not find base pointer"); + WriteAF = SE->getMinusSCEV(WriteAF, WriteBasePtr); + WSize = TD->getTypeStoreSize( + WriteBasePtr->getType()->getPointerElementType()); + Accs.push_back(std::make_pair( + IRAccess(IRAccess::MAY_WRITE, WriteBasePtr->getValue(), WriteAF, + WSize, false, Subscripts, Sizes, WriteAF), + CI)); + } + return; + case AliasAnalysis::OnlyReadsMemory: + TS.GlobalAccessInstructions.push_back(std::make_pair(CI, IRAccess::READ)); + return; + default: + break; + } + llvm_unreachable("Unknown call instruction, cannot model the effect!"); } -void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB) { +void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, + TempScop &TS) { AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); @@ -285,7 +333,7 @@ // Some call accesses and intrinsics might access the memory. If so we // build over approximated IRAccesses for them. if (CallInst *CI = dyn_cast(Inst)) - buildIRCallAccess(CI, L, &R, Functions); + buildIRCallAccess(CI, L, TS, Functions); if (!isa(Inst) && buildScalarDependences(Inst, &R)) { // If the Instruction is used outside the statement, we need to build the @@ -415,7 +463,7 @@ TempScop *TScop = new TempScop(R, LoopBounds, BBConds, AccFuncMap); for (const auto &BB : R.blocks()) { - buildAccessFunctions(R, *BB); + buildAccessFunctions(R, *BB, *TScop); buildCondition(BB, R.getEntry()); } Index: test/ScopDetect/mod_ref_read_pointer.ll =================================================================== --- /dev/null +++ test/ScopDetect/mod_ref_read_pointer.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -analyze < %s | FileCheck %s +; +; CHECK: Valid Region for Scop: for.cond => for.end +; +; #pragma readonly +; int func(int *A); +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; A[i + 2] = func(A); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare i32 @func(i32* %A) #1 + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = call i32 @func(i32* %A) + %tmp = add nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %tmp + store i32 %call, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +attributes #1 = { nounwind readonly } Index: test/ScopInfo/mod_ref_read_pointer.ll =================================================================== --- /dev/null +++ test/ScopInfo/mod_ref_read_pointer.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; Check that we assume the call to func has a read on the whole A array. +; TODO: Use range analysis metadata! +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : i0 >= 0 and i0 <= 1023 }; +; CHECK: Scattering := +; CHECK: { Stmt_for_body[i0] -> scattering[0, i0, 0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[2 + i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[o0] }; +; +; #pragma readonly +; int func(int *A); +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; A[i + 2] = func(A); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = call i32 @func(i32* %A) #2 + %tmp = add nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %tmp + store i32 %call, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare i32 @func(i32*) #1 + +attributes #1 = { nounwind readonly } Index: test/ScopInfo/mod_ref_read_pointers.ll =================================================================== --- /dev/null +++ test/ScopInfo/mod_ref_read_pointers.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; Check that the call to func will "read" not only the A array but also the +; B array. The reason is the readonly annotation of func. +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : i0 >= 0 and i0 <= 1023 }; +; CHECK: Scattering := +; CHECK: { Stmt_for_body[i0] -> scattering[0, i0, 0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_B[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[2 + i0] }; +; CHECK-DAG: ReadAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_body[i0] -> MemRef_B[o0] }; +; CHECK-DAG: ReadAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_body[i0] -> MemRef_A[o0] }; +; +; #pragma readonly +; int func(int *A); +; +; void jd(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i + 2] = func(A) + B[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A, i32* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = call i32 @func(i32* %A) #2 + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %call, %tmp + %tmp2 = add nsw i64 %indvars.iv, 2 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %tmp2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare i32 @func(i32*) #1 + +attributes #1 = { nounwind readonly }