Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -102,17 +102,18 @@ cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); -enum class GranularityChoice { BasicBlocks, Stores }; - -static cl::opt - StmtGranularity("polly-stmt-granularity", - cl::desc("Select the statement granularity algorithm"), - cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", - "Entire basic blocks granularity"), - clEnumValN(GranularityChoice::Stores, "store", - "Store-level granularity")), - cl::init(GranularityChoice::BasicBlocks), - cl::cat(PollyCategory)); +enum class GranularityChoice { BasicBlocks, Stores, Instructions }; + +static cl::opt StmtGranularity( + "polly-stmt-granularity", + cl::desc("Select the statement granularity algorithm"), + cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", + "Entire basic blocks granularity"), + clEnumValN(GranularityChoice::Stores, "store", + "Store-level granularity"), + clEnumValN(GranularityChoice::Instructions, "instruction", + "Instruction-level granularity")), + cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory)); void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, Region *NonAffineSubRegion, @@ -699,6 +700,7 @@ if (shouldModelInst(&Inst, L)) Instructions.push_back(&Inst); if (Inst.getMetadata("polly_split_after") || + StmtGranularity == GranularityChoice::Instructions || (StmtGranularity == GranularityChoice::Stores && isa(Inst))) { Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs()); @@ -737,6 +739,7 @@ Count++; } if (Inst.getMetadata("polly_split_after") || + StmtGranularity == GranularityChoice::Instructions || (StmtGranularity == GranularityChoice::Stores && isa(Inst))) Split = true; Index: test/ScopInfo/zero_ext_of_truncate_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/zero_ext_of_truncate_3.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly -polly-scops -analyze -polly-invariant-load-hoisting=true -polly-stmt-granularity=instruction < %s | FileCheck %s +; +; void f(unsigned long *restrict I, unsigned *restrict A, unsigned N) { +; for (unsigned i = 0; i < N; i++) { +; unsigned V = *I; +; if (V < i) +; A[i]++; +; } +; } +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N, tmp] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N, tmp] -> { : N > 0 and (tmp < 0 or tmp >= 2147483648) } +; +; CHECK: Domain := +; CHECK-NEXT: [N, tmp] -> { Stmt_if_then[i0] : tmp >= 0 and tmp < i0 < N }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i64* noalias %I, i32* noalias %A, i32 %N, i32 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = load i64, i64* %I, align 8 + %conv = trunc i64 %tmp to i32 + %tmp1 = zext i32 %conv to i64 + %cmp1 = icmp ult i64 %tmp1, %indvars.iv + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %arrayidx, align 4 + %inc = add i32 %tmp2, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Simplify/redundant_region_scalar_1.ll =================================================================== --- /dev/null +++ test/Simplify/redundant_region_scalar_1.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-simplify -polly-stmt-granularity=instruction -analyze < %s | FileCheck %s -match-full-lines +; +; Remove redundant store (a store that writes the same value already +; at the destination) in a region. +; +define void @redundant_region_scalar_1(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + + bodyA: + %val1 = load double, double* %A + br label %region_entry + + region_entry: + %val2 = load double, double* %A + %cmp = fcmp oeq double %val1, 0.0 + br i1 %cmp, label %region_true, label %region_exit + + region_true: + br label %region_exit + + region_exit: + br label %bodyB + + bodyB: + store double %val2, double* %A + br label %inc + + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Redundant writes removed: 3 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: }