Index: include/polly/ScopBuilder.h =================================================================== --- include/polly/ScopBuilder.h +++ include/polly/ScopBuilder.h @@ -233,7 +233,7 @@ /// /// Consecutive instructions are associated to the same statement until a /// separator is found. - void buildSequentialBlockStmts(BasicBlock *BB); + void buildSequentialBlockStmts(BasicBlock *BB, bool Split = false); /// Create ScopStmt for all BBs and non-affine subregions of @p SR. /// Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -102,14 +102,16 @@ cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); -enum class GranularityChoice { BasicBlocks }; +enum class GranularityChoice { BasicBlocks, Stores }; static cl::opt StmtGranularity( "polly-stmt-granularity", cl::desc( "Algorithm to use for splitting basic blocks into multiple statements"), cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", - "One statement per basic block")), + "One statement per basic block"), + clEnumValN(GranularityChoice::Stores, "store", + "Store-level granularity")), cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory)); void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, @@ -683,7 +685,7 @@ !canSynthesize(Inst, *scop, &SE, L); } -void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB) { +void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool Split) { Loop *SurroundingLoop = LI.getLoopFor(BB); int Count = 0; @@ -691,7 +693,8 @@ for (Instruction &Inst : *BB) { if (shouldModelInst(&Inst, SurroundingLoop)) Instructions.push_back(&Inst); - if (Inst.getMetadata("polly_split_after")) { + if (Inst.getMetadata("polly_split_after") || + (Split && isa(Inst))) { scop->addScopStmt(BB, SurroundingLoop, Instructions, Count); Count++; Instructions.clear(); @@ -722,6 +725,9 @@ case GranularityChoice::BasicBlocks: buildSequentialBlockStmts(BB); break; + case GranularityChoice::Stores: + buildSequentialBlockStmts(BB, true); + break; } } } Index: test/Isl/CodeGen/non-affine-dominance-generated-entering-1.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/non-affine-dominance-generated-entering-1.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-codegen -polly-stmt-granularity=store -S < %s | FileCheck %s +; +; llvm.org/PR25439 +; Scalar reloads in the generated entering block were not recognized as +; dominating the subregion blocks when there were multiple entering nodes. This +; resulted in values defined in there (here: %cond used in subregionB_entry) not +; being copied. We check whether it is reusing the reloaded scalar. +; +; CHECK-LABEL: polly.stmt.subregionB_entry.exit: +; CHECK: store i1 %polly.cond, i1* %cond.s2a +; +; CHECK-LABEL: polly.stmt.subregionB_entry.entry: +; CHECK: %cond.s2a.reload = load i1, i1* %cond.s2a +; +; CHECK-LABEL: polly.stmt.subregionB_entry: +; CHECK: br i1 %cond.s2a.reload + +define void @func(i32* %A) { +entry: + br label %subregionA_entry + +subregionA_entry: + %cond = phi i1 [ false, %entry ], [ true, %subregionB_exit ] + br i1 %cond, label %subregionA_if, label %subregionA_else + +subregionA_if: + br label %subregionB_entry + +subregionA_else: + br label %subregionB_entry + +subregionB_entry: + store i32 0, i32* %A + br i1 %cond, label %subregionB_if, label %subregionB_exit + +subregionB_if: + br label %subregionB_exit + +subregionB_exit: + br i1 false, label %subregionA_entry, label %return + +return: + ret void +} Index: test/ScopInfo/invariant_load_zext_parameter-3.ll =================================================================== --- /dev/null +++ test/ScopInfo/invariant_load_zext_parameter-3.ll @@ -0,0 +1,112 @@ +; RUN: opt %loadPolly -scalar-evolution-max-value-compare-depth=3 -polly-scops -polly-invariant-load-hoisting=true -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -scalar-evolution-max-value-compare-depth=3 -polly-codegen -polly-invariant-load-hoisting=true -polly-stmt-granularity=store -analyze < %s +; +; Stress test for the code generation of invariant accesses. +; +; void f(int *I0, int *I1, int *I2, int *V, long p0, long p1, long p2, long p3) { +; *V = *I1; +; for (int i = 0; i < 1000; i++) { +; long n0 = p0 * *I1 + p1 * *I1; +; V[i] = I0[n0]; +; long m0 = p0 * (I2[0]); +; long m1 = p1 * (I2[1]); +; long m2 = p2 * (I2[2]); +; long m3 = p3 * (I2[3]); +; int j = 0; +; do { +; if (j > 0) { +; V[i] += I1[m0 + m2]; +; V[i] += I1[n0]; +; } +; } while (j++ < m1 + m3 * n0); +; } +; } +; +; CHECK: p0: ((sext i32 %tmp6 to i64) * %p1) +; CHECK: p1: ((sext i32 %tmp3 to i64) * (sext i32 %tmp8 to i64) * (%p0 + %p1) * %p3) +; CHECK: p2: ((sext i32 %tmp3 to i64) * (%p0 + %p1)) +; CHECK: p3: ((sext i32 %tmp5 to i64) * %p0) +; CHECK: p4: ((sext i32 %tmp7 to i64) * %p2) +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %I0, i32* %I1, i32* %I2, i32* %V, i64 %p0, i64 %p1, i64 %p2, i64 %p3) { +entry: + %tmp = load i32, i32* %I1, align 4 + store i32 %tmp, i32* %V, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv1, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = load i32, i32* %I1, align 4 + %conv = sext i32 %tmp3 to i64 + %mul = mul nsw i64 %conv, %p0 + %conv1 = sext i32 %tmp3 to i64 + %mul2 = mul nsw i64 %conv1, %p1 + %add = add nsw i64 %mul, %mul2 + %arrayidx = getelementptr inbounds i32, i32* %I0, i64 %add + %tmp4 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv1 + store i32 %tmp4, i32* %arrayidx3, align 4 + %tmp5 = load i32, i32* %I2, align 4 + %conv5 = sext i32 %tmp5 to i64 + %mul6 = mul nsw i64 %conv5, %p0 + %arrayidx7 = getelementptr inbounds i32, i32* %I2, i64 1 + %tmp6 = load i32, i32* %arrayidx7, align 4 + %conv8 = sext i32 %tmp6 to i64 + %mul9 = mul nsw i64 %conv8, %p1 + %arrayidx10 = getelementptr inbounds i32, i32* %I2, i64 2 + %tmp7 = load i32, i32* %arrayidx10, align 4 + %conv11 = sext i32 %tmp7 to i64 + %mul12 = mul nsw i64 %conv11, %p2 + %arrayidx13 = getelementptr inbounds i32, i32* %I2, i64 3 + %tmp8 = load i32, i32* %arrayidx13, align 4 + %conv14 = sext i32 %tmp8 to i64 + %mul15 = mul nsw i64 %conv14, %p3 + br label %do.body + +do.body: ; preds = %do.cond, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %for.body ] + %cmp16 = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp16, label %if.then, label %if.end + +if.then: ; preds = %do.body + %add18 = add nsw i64 %mul6, %mul12 + %arrayidx19 = getelementptr inbounds i32, i32* %I1, i64 %add18 + %tmp9 = load i32, i32* %arrayidx19, align 4 + %arrayidx21 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv1 + %tmp10 = load i32, i32* %arrayidx21, align 4 + %add22 = add nsw i32 %tmp10, %tmp9 + store i32 %add22, i32* %arrayidx21, align 4 + %arrayidx23 = getelementptr inbounds i32, i32* %I1, i64 %add + %tmp11 = load i32, i32* %arrayidx23, align 4 + %arrayidx25 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv1 + %tmp12 = load i32, i32* %arrayidx25, align 4 + %add26 = add nsw i32 %tmp12, %tmp11 + store i32 %add26, i32* %arrayidx25, align 4 + br label %if.end + +if.end: ; preds = %if.then, %do.body + br label %do.cond + +do.cond: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %mul28 = mul nsw i64 %mul15, %add + %add29 = add nsw i64 %mul9, %mul28 + %cmp30 = icmp slt i64 %indvars.iv, %add29 + br i1 %cmp30, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %for.inc + +for.inc: ; preds = %do.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}