Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -168,6 +168,16 @@ "Do not add parameter bounds and do no gist simplify sets accordingly"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +static cl::opt PollyAllowDereferenceOfAllFunctionParams( + "polly-allow-dereference-of-all-function-parameters", + cl::desc( + "Treat all parameters to functions that are pointers as dereferencible." + " This is useful for invariant load hoisting, since we can generate" + " less runtime checks. This is only valid if all pointers to functions" + " are always initialized, so that Polly can choose to hoist" + " their loads. "), + cl::Hidden, cl::init(false), cl::cat(PollyCategory)); + static cl::opt PollyPreciseFoldAccesses( "polly-precise-fold-accesses", cl::desc("Fold memory accesses to model more possible delinearizations " @@ -3827,11 +3837,24 @@ return nullptr; } +bool isAParameter(llvm::Value *maybeParam, const Function &F) { + for (const llvm::Argument &Arg : F.args()) + if (&Arg == maybeParam) + return true; + + return false; +}; + bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, bool MAInvalidCtxIsEmpty, bool NonHoistableCtxIsEmpty) { LoadInst *LInst = cast(MA->getAccessInstruction()); + errs() << "Considering: " << *LInst << "\n"; const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout(); + if (PollyAllowDereferenceOfAllFunctionParams && + isAParameter(LInst->getPointerOperand(), getFunction())) + return true; + // TODO: We can provide more information for better but more expensive // results. if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(), @@ -3855,6 +3878,7 @@ for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++) if (!isa(MA->getSubscript(u))) return false; + return true; } Index: test/ScopInfo/allow-all-parameters-dereferencable.ll =================================================================== --- /dev/null +++ test/ScopInfo/allow-all-parameters-dereferencable.ll @@ -0,0 +1,98 @@ +; RUN: opt %loadPolly -analyze -polly-invariant-load-hoisting \ +; RUN: -polly-allow-dereference-of-all-function-parameters \ +; RUN: -polly-scops < %s | FileCheck %s --check-prefix=SCOP + +; RUN: opt %loadPolly -S -polly-invariant-load-hoisting \ +; RUN: -polly-codegen < %s | FileCheck %s --check-prefix=CODE-RTC + + +; RUN: opt %loadPolly -S -polly-invariant-load-hoisting \ +; RUN: -polly-allow-dereference-of-all-function-parameters \ +; RUN: -polly-codegen < %s | FileCheck %s --check-prefix=CODE + +; SCOP: Function: hoge +; SCOP-NEXT: Region: %bb15---%bb37 +; SCOP-NEXT: Max Loop Depth: 2 +; SCOP-NEXT: Invariant Accesses: { +; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOP-NEXT: [tmp, tmp17, tmp28] -> { Stmt_bb29[i0] -> MemRef_arg1[0] }; +; SCOP-NEXT: Execution Context: [tmp, tmp17, tmp28] -> { : } +; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOP-NEXT: [tmp, tmp17, tmp28] -> { Stmt_bb27[] -> MemRef_arg[0] }; +; SCOP-NEXT: Execution Context: [tmp, tmp17, tmp28] -> { : } +; SCOP-NEXT: } + +; Check that without the option `-pollt-allow-dereference-of-all-function-parameters` +; we do generate the runtime check. +; CODE-RTC: polly.preload.cond: ; preds = %polly.preload.begin +; CODE-RTC-NEXT: br i1 %{{[a-zA-Z0-9]*}}, label %polly.preload.exec, label %polly.preload.merge + +; Check that we don't generate a runtime check because we treat all +; parameters as dereferencable. +; CODE-NOT: polly.preload.cond: ; preds = %polly.preload.begin +; CODE-NOT: br i1 %{{r1:[a-zA-Z0-9]*}}, label %polly.preload.exec, label %polly.preload.merge + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@global = external global i32 + +; Function Attrs: nounwind uwtable +define void @hoge(i32* noalias %arg, i32* noalias %arg1, [0 x double]* noalias %arg2, float* %A) #0 { +bb: + %tmp = load i32, i32* @global, align 4 + %tmp3 = icmp sgt i32 %tmp, 1 + br label %bb14 + +bb14: ; preds = %bb + br label %bb15 + +bb15: ; preds = %bb14 + br i1 %tmp3, label %bb16, label %bb27 + +bb16: ; preds = %bb15 + %tmp17 = load i32, i32* %arg1, align 4 + br label %bb18 + +bb18: ; preds = %bb18, %bb16 + %tmp19 = phi i32 [ %tmp25, %bb18 ], [ 1, %bb16 ] + %tmp20 = sext i32 %tmp19 to i64 + %tmp21 = add nsw i64 %tmp20, -1 + %tmp22 = getelementptr [0 x double], [0 x double]* %arg2, i64 0, i64 %tmp21 + %tmp23 = bitcast double* %tmp22 to i64* + store i64 undef, i64* %tmp23, align 8 + %tmp24 = icmp eq i32 %tmp19, %tmp17 + %tmp25 = add i32 %tmp19, 1 + br i1 %tmp24, label %bb26, label %bb18 + +bb26: ; preds = %bb18 + br label %bb27 + +bb27: ; preds = %bb26, %bb15 + %tmp28 = load i32, i32* %arg, align 4 + store float 42.0, float* %A + br label %bb29 + +bb29: ; preds = %bb35, %bb27 + %tmp30 = load i32, i32* %arg1, align 4 + store float 42.0, float* %A + br label %bb31 + +bb31: ; preds = %bb31, %bb29 + %tmp32 = phi i32 [ undef, %bb31 ], [ 1, %bb29 ] + store float 42.0, float* %A + %tmp33 = icmp eq i32 %tmp32, %tmp30 + br i1 %tmp33, label %bb34, label %bb31 + +bb34: ; preds = %bb31 + br label %bb35 + +bb35: ; preds = %bb34 + %tmp36 = icmp eq i32 0, %tmp28 + br i1 %tmp36, label %bb37, label %bb29 + +bb37: ; preds = %bb35 + ret void +} + +attributes #0 = { nounwind uwtable }