Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -327,6 +327,9 @@ /// normal scalar array modeling. bool isPHIKind() const { return Kind == MK_PHI; }; + /// @brief Is this array info modeling an array? + bool isArrayKind() const { return Kind == MK_Array; }; + /// @brief Dump a readable representation to stderr. void dump() const; @@ -2030,6 +2033,12 @@ /// @brief The ScalarEvolution to help building Scop. ScalarEvolution *SE; + /// @brief Set of instructions that might read any memory location. + SmallVector GlobalReads; + + /// @brief Set of all accessed array base pointers. + SmallSetVector ArrayBasePointers; + // The Scop std::unique_ptr scop; @@ -2084,6 +2093,19 @@ const ScopDetection::BoxedLoopsSetTy *BoxedLoops, const InvariantLoadsSetTy &ScopRIL); + /// @brief Try to build a MemoryAccess for a call instruction. + /// + /// @param Inst The call instruction that access the memory + /// @param L The parent loop of the instruction + /// @param R The region on which to build the data access dictionary. + /// @param BoxedLoops The set of loops that are overapproximated in @p R. + /// @param ScopRIL The required invariant loads equivalence classes. + /// + /// @returns True if the access could be built, False otherwise. + bool buildAccessCallInst(MemAccInst Inst, Loop *L, Region *R, + const ScopDetection::BoxedLoopsSetTy *BoxedLoops, + const InvariantLoadsSetTy &ScopRIL); + /// @brief Build a single-dimensional parameteric sized MemoryAccess /// from the Load/Store instruction. /// Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -51,10 +51,10 @@ /// referenced object and should be passed by-value as it is small enough. /// /// This proxy can either represent a LoadInst instance, a StoreInst instance, -/// a MemIntrinsic instance (memset, memmove, memcpy) or a nullptr (only -/// creatable using the default constructor); never an Instruction that is -/// neither of the above mentioned. When representing a nullptr, only the -/// following methods are defined: +/// a MemIntrinsic instance (memset, memmove, memcpy), a CallInst instance or a +/// nullptr (only creatable using the default constructor); never an Instruction +/// that is neither of the above mentioned. When representing a nullptr, only +/// the following methods are defined: /// isNull(), isInstruction(), isLoad(), isStore(), ..., isMemTransferInst(), /// operator bool(), operator!() /// @@ -75,16 +75,17 @@ /* implicit */ MemAccInst(llvm::StoreInst &SI) : I(&SI) {} /* implicit */ MemAccInst(llvm::StoreInst *SI) : I(SI) {} /* implicit */ MemAccInst(llvm::MemIntrinsic *MI) : I(MI) {} + /* implicit */ MemAccInst(llvm::CallInst *CI) : I(CI) {} explicit MemAccInst(llvm::Instruction &I) : I(&I) { assert(isa(I)); } explicit MemAccInst(llvm::Instruction *I) : I(I) { assert(isa(I)); } static bool isa(const llvm::Value &V) { return llvm::isa(V) || llvm::isa(V) || - llvm::isa(V); + llvm::isa(V) || llvm::isa(V); } static bool isa(const llvm::Value *V) { return llvm::isa(V) || llvm::isa(V) || - llvm::isa(V); + llvm::isa(V) || llvm::isa(V); } static MemAccInst cast(llvm::Value &V) { return MemAccInst(llvm::cast(V)); @@ -140,6 +141,14 @@ I = MI; return *this; } + MemAccInst &operator=(llvm::CallInst &CI) { + I = &CI; + return *this; + } + MemAccInst &operator=(llvm::CallInst *CI) { + I = CI; + return *this; + } operator llvm::Instruction *() const { return asInstruction(); } explicit operator bool() const { return isInstruction(); } @@ -168,6 +177,8 @@ return asStore()->getValueOperand(); if (isMemIntrinsic()) return nullptr; + if (isCallInst()) + return nullptr; llvm_unreachable("Operation not supported on nullptr"); } llvm::Value *getPointerOperand() const { @@ -177,6 +188,8 @@ return asStore()->getPointerOperand(); if (isMemIntrinsic()) return asMemIntrinsic()->getDest(); + if (isCallInst()) + return nullptr; llvm_unreachable("Operation not supported on nullptr"); } @@ -187,6 +200,8 @@ return asStore()->getAlignment(); if (isMemIntrinsic()) return asMemIntrinsic()->getAlignment(); + if (isCallInst()) + return 0; llvm_unreachable("Operation not supported on nullptr"); } bool isVolatile() const { @@ -196,6 +211,8 @@ return asStore()->isVolatile(); if (isMemIntrinsic()) return asMemIntrinsic()->isVolatile(); + if (isCallInst()) + return false; llvm_unreachable("Operation not supported on nullptr"); } bool isSimple() const { @@ -205,6 +222,8 @@ return asStore()->isSimple(); if (isMemIntrinsic()) return !asMemIntrinsic()->isVolatile(); + if (isCallInst()) + return true; llvm_unreachable("Operation not supported on nullptr"); } llvm::AtomicOrdering getOrdering() const { @@ -214,6 +233,8 @@ return asStore()->getOrdering(); if (isMemIntrinsic()) return llvm::AtomicOrdering::NotAtomic; + if (isCallInst()) + return llvm::AtomicOrdering::NotAtomic; llvm_unreachable("Operation not supported on nullptr"); } bool isUnordered() const { @@ -224,6 +245,8 @@ // Copied from the Load/Store implementation of isUnordered: if (isMemIntrinsic()) return !asMemIntrinsic()->isVolatile(); + if (isCallInst()) + return true; llvm_unreachable("Operation not supported on nullptr"); } @@ -231,6 +254,7 @@ bool isInstruction() const { return I; } bool isLoad() const { return I && llvm::isa(I); } bool isStore() const { return I && llvm::isa(I); } + bool isCallInst() const { return I && llvm::isa(I); } bool isMemIntrinsic() const { return I && llvm::isa(I); } bool isMemSetInst() const { return I && llvm::isa(I); } bool isMemTransferInst() const { @@ -240,6 +264,7 @@ llvm::Instruction *asInstruction() const { return I; } llvm::LoadInst *asLoad() const { return llvm::cast(I); } llvm::StoreInst *asStore() const { return llvm::cast(I); } + llvm::CallInst *asCallInst() const { return llvm::cast(I); } llvm::MemIntrinsic *asMemIntrinsic() const { return llvm::cast(I); } Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -462,7 +462,8 @@ return true; if (auto *II = dyn_cast(&CI)) - return isValidIntrinsicInst(*II, Context); + if (isValidIntrinsicInst(*II, Context)) + return true; Function *CalledFunction = CI.getCalledFunction(); @@ -470,6 +471,29 @@ if (CalledFunction == 0) return false; + switch (AA->getModRefBehavior(CalledFunction)) { + case llvm::FMRB_UnknownModRefBehavior: + return false; + case llvm::FMRB_DoesNotAccessMemory: + case llvm::FMRB_OnlyReadsMemory: + return true; + case llvm::FMRB_OnlyReadsArgumentPointees: + case llvm::FMRB_OnlyAccessesArgumentPointees: + for (const auto &Arg : CI.arg_operands()) { + if (!Arg->getType()->isPointerTy()) + continue; + + // Bail if a pointer argument has a base address not known to + // ScalarEvolution. Note that a zero pointer is acceptable. + auto *ArgSCEV = SE->getSCEVAtScope(Arg, LI->getLoopFor(CI.getParent())); + if (ArgSCEV->isZero()) + continue; + if (!isa(SE->getPointerBase(ArgSCEV))) + return false; + } + return true; + } + return false; } Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -3939,6 +3939,51 @@ return true; } +bool ScopInfo::buildAccessCallInst( + MemAccInst Inst, Loop *L, Region *R, + const ScopDetection::BoxedLoopsSetTy *BoxedLoops, + const InvariantLoadsSetTy &ScopRIL) { + if (!Inst.isCallInst()) + return false; + + auto &CI = *Inst.asCallInst(); + if (isIgnoredIntrinsic(&CI)) + return true; + + bool ReadOnly = false; + auto *AF = SE->getConstant(IntegerType::getInt64Ty(CI.getContext()), 0); + auto *CalledFunction = CI.getCalledFunction(); + switch (AA->getModRefBehavior(CalledFunction)) { + case llvm::FMRB_UnknownModRefBehavior: + llvm_unreachable("Unknown mod ref behaviour cannot be represented."); + case llvm::FMRB_DoesNotAccessMemory: + return true; + case llvm::FMRB_OnlyReadsMemory: + GlobalReads.push_back(&CI); + return true; + case llvm::FMRB_OnlyReadsArgumentPointees: + ReadOnly = true; + // Fall through + case llvm::FMRB_OnlyAccessesArgumentPointees: + auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE; + for (const auto &Arg : CI.arg_operands()) { + if (!Arg->getType()->isPointerTy()) + continue; + + auto *ArgSCEV = SE->getSCEVAtScope(Arg, L); + if (ArgSCEV->isZero()) + continue; + + auto *ArgBasePtr = cast(SE->getPointerBase(ArgSCEV)); + addArrayAccess(Inst, AccType, ArgBasePtr->getValue(), + ArgBasePtr->getType(), false, {AF}, {}, &CI); + } + return true; + } + + return true; +} + void ScopInfo::buildAccessSingleDim( MemAccInst Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops, @@ -3990,6 +4035,9 @@ if (buildAccessMemIntrinsic(Inst, L, R, BoxedLoops, ScopRIL)) return; + if (buildAccessCallInst(Inst, L, R, BoxedLoops, ScopRIL)) + return; + if (buildAccessMultiDimFixed(Inst, L, R, BoxedLoops, ScopRIL)) return; @@ -4126,6 +4174,7 @@ bool IsAffine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue) { + ArrayBasePointers.insert(BaseAddress); addMemoryAccess(MemAccInst.getParent(), MemAccInst, AccType, BaseAddress, ElementType, IsAffine, AccessValue, Subscripts, Sizes, ScopArrayInfo::MK_Array); @@ -4253,6 +4302,14 @@ buildAccessFunctions(R, *R.getExit(), *SD->getInsnToMemAccMap(&R), nullptr, /* IsExitBlock */ true); + // Create memory accesses for global reads since all arrays are now known. + auto *AF = SE->getConstant(IntegerType::getInt64Ty(SE->getContext()), 0); + for (auto *GlobalRead : GlobalReads) + for (auto *BP : ArrayBasePointers) + addArrayAccess(MemAccInst(GlobalRead), MemoryAccess::READ, BP, + BP->getType()->getPointerElementType(), false, {AF}, {}, + GlobalRead); + scop->init(*AA, AC, *SD, *DT, *LI); } Index: lib/CodeGen/IRBuilder.cpp =================================================================== --- lib/CodeGen/IRBuilder.cpp +++ lib/CodeGen/IRBuilder.cpp @@ -135,7 +135,11 @@ if (!MemInst) return; - auto *PtrSCEV = SE->getSCEV(MemInst.getPointerOperand()); + auto *Ptr = MemInst.getPointerOperand(); + if (!Ptr) + return; + + auto *PtrSCEV = SE->getSCEV(Ptr); auto *BaseSCEV = SE->getPointerBase(PtrSCEV); auto *SU = dyn_cast(BaseSCEV); Index: test/ScopDetect/mod_ref_read_pointer.ll =================================================================== --- /dev/null +++ test/ScopDetect/mod_ref_read_pointer.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -analyze < %s | FileCheck %s +; +; CHECK: Valid Region for Scop: for.cond => for.end +; +; #pragma readonly +; int func(int *A); +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; A[i + 2] = func(A); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare i32 @func(i32* %A) #1 + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = call i32 @func(i32* %A) + %tmp = add nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %tmp + store i32 %call, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +attributes #1 = { nounwind readonly } Index: test/ScopInfo/mod_ref_access_pointee_arguments.ll =================================================================== --- /dev/null +++ test/ScopInfo/mod_ref_access_pointee_arguments.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-codegen -disable-output < %s +; +; Verify that we model the may-write access of the prefetch intrinsic +; correctly, thus that A is accessed by it but B is not. +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : 0 <= i0 <= 1023 }; +; CHECK: Schedule := +; CHECK: { Stmt_for_body[i0] -> [i0] }; +; CHECK: MayWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[o0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_B[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; +; void jd(int *restirct A, int *restrict B) { +; for (int i = 0; i < 1024; i++) { +; @llvm.prefetch(A); +; A[i] = B[i]; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A, i32* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %bc = bitcast i32* %arrayidx to i8* + call void @llvm.prefetch(i8* %bc, i32 1, i32 1, i32 1) + %tmp = load i32, i32* %arrayidx2 + store i32 %tmp, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @llvm.prefetch(i8*, i32, i32, i32) Index: test/ScopInfo/mod_ref_read_pointee_arguments.ll =================================================================== --- /dev/null +++ test/ScopInfo/mod_ref_read_pointee_arguments.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-codegen -disable-output < %s +; +; Verify that we model the read access of the gcread intrinsic +; correctly, thus that A is read by it but B is not. +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : 0 <= i0 <= 1023 }; +; CHECK: Schedule := +; CHECK: { Stmt_for_body[i0] -> [i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[o0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_dummyloc[0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_B[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; +; void jd(int *restirct A, int *restrict B) { +; char **dummyloc; +; for (int i = 0; i < 1024; i++) { +; char *dummy = @llvm.gcread(A, nullptr); +; *dummyloc = dummy; +; A[i] = B[i]; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A, i32* noalias %B) gc "dummy" { +entry: + %dummyloc = alloca i8* + br label %entry.split + +entry.split: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry.split + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry.split ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %bc = bitcast i32* %arrayidx to i8* + %dummy = call i8* @llvm.gcread(i8* %bc, i8** null) + store i8* %dummy, i8** %dummyloc, align 4 + %tmp = load i32, i32* %arrayidx2 + store i32 %tmp, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare i8* @llvm.gcread(i8*, i8**) Index: test/ScopInfo/mod_ref_read_pointer.ll =================================================================== --- /dev/null +++ test/ScopInfo/mod_ref_read_pointer.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-codegen -disable-output < %s +; +; Check that we assume the call to func has a read on the whole A array. +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : 0 <= i0 <= 1023 }; +; CHECK: Schedule := +; CHECK: { Stmt_for_body[i0] -> [i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[2 + i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[o0] }; +; +; #pragma readonly +; int func(int *A); +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; A[i + 2] = func(A); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = call i32 @func(i32* %A) #2 + %tmp = add nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %tmp + store i32 %call, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare i32 @func(i32*) #1 + +attributes #1 = { nounwind readonly } Index: test/ScopInfo/mod_ref_read_pointers.ll =================================================================== --- /dev/null +++ test/ScopInfo/mod_ref_read_pointers.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-codegen -disable-output < %s +; +; Check that the call to func will "read" not only the A array but also the +; B array. The reason is the readonly annotation of func. +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : 0 <= i0 <= 1023 }; +; CHECK: Schedule := +; CHECK: { Stmt_for_body[i0] -> [i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_B[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[2 + i0] }; +; CHECK-DAG: ReadAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_body[i0] -> MemRef_B[o0] }; +; CHECK-DAG: ReadAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_body[i0] -> MemRef_A[o0] }; +; +; #pragma readonly +; int func(int *A); +; +; void jd(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i + 2] = func(A) + B[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* noalias %A, i32* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %call = call i32 @func(i32* %A) + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %call, %tmp + %tmp2 = add nsw i64 %indvars.iv, 2 + %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %tmp2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare i32 @func(i32*) #1 + +attributes #1 = { nounwind readonly }