Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -63,6 +63,13 @@ cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); +static int extractAffine(__isl_take isl_set *Set, __isl_take isl_aff *Aff, + void *User) { + *((isl_aff **)(User)) = Aff; + isl_set_free(Set); + return 0; +} + /// Translate a 'const SCEV *' expression in an isl_pw_aff. struct SCEVAffinator : public SCEVVisitor { public: @@ -263,7 +270,32 @@ } __isl_give isl_pw_aff *SCEVAffinator::visitUnknown(const SCEVUnknown *Expr) { - llvm_unreachable("Unknowns are always parameters"); + Value *Unknown = Expr->getValue(); + if (BinaryOperator *BO = dyn_cast(Unknown)) { + isl_pw_aff *RHS, *LHS; + isl_aff *RHSAff; + isl_val *RHSVal; + + assert(BO->getOpcode() == Instruction::SRem && + "Binary operator unknowns are always signed modulo expressions"); + LHS = visit(S->getSE()->getSCEV(BO->getOperand(0))); + RHS = visit(S->getSE()->getSCEV(BO->getOperand(1))); + assert(isl_pw_aff_is_cst(RHS) && + "Modulo expressions are only valid for a constant right hand side"); + assert(isl_pw_aff_n_piece(RHS) == 1 && + "Modulo expressions are only valid for a non split right hand side"); + isl_pw_aff_foreach_piece(RHS, extractAffine, &RHSAff); + assert(isl_aff_is_cst(RHSAff) && + "Modulo expressions are only valid for a constant right hand side"); + RHSVal = isl_aff_get_constant_val(RHSAff); + + isl_pw_aff_free(RHS); + isl_aff_free(RHSAff); + + return isl_pw_aff_mod_val(LHS, RHSVal); + } + + llvm_unreachable("Unknowns are always parameters or modulo expressions"); } int SCEVAffinator::getLoopDepth(const Loop *L) { Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -346,12 +346,27 @@ return ValidatorResult(SCEVType::INVALID); } - if (Instruction *I = dyn_cast(Expr->getValue())) + if (Instruction *I = dyn_cast(Expr->getValue())) { + if (I->getOpcode() == Instruction::SRem) { + + ValidatorResult Op0 = visit(SE.getSCEV(I->getOperand(0))); + if (!Op0.isValid()) + return ValidatorResult(SCEVType::INVALID); + + ValidatorResult Op1 = visit(SE.getSCEV(I->getOperand(1))); + if (!Op1.isValid() || !Op1.isConstant()) + return ValidatorResult(SCEVType::INVALID); + + Op0.merge(Op1); + return Op0; + } + if (R->contains(I)) { DEBUG(dbgs() << "INVALID: UnknownExpr references an instruction " "within the region\n"); return ValidatorResult(SCEVType::INVALID); } + } if (BaseAddress == V) { DEBUG(dbgs() << "INVALID: UnknownExpr references BaseAddress\n"); Index: test/DeadCodeElimination/non-affine-affine-mix.ll =================================================================== --- test/DeadCodeElimination/non-affine-affine-mix.ll +++ test/DeadCodeElimination/non-affine-affine-mix.ll @@ -2,9 +2,9 @@ ; ; void f(int *A) { ; for (int i = 0; i < 1024; i++) -; S1: A[i % 2] = i; +; S1: A[i ^ 2] = i; ; for (int i = 0; i < 1024; i++) -; S2: A[i2] = i; +; S2: A[i] = i; ; } ; We unfortunately do need to execute all iterations of S1, as we do not know @@ -16,45 +16,52 @@ ; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) ; CHECK: Stmt_S2(c1); -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @f(i32* %A) { entry: br label %for.cond -for.cond: +for.cond: ; preds = %for.inc, %entry %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %exitcond = icmp ne i32 %i.0, 1024 - br i1 %exitcond, label %S1, label %next + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end -S1: - %rem = srem i32 %i.0, 2 - %arrayidx = getelementptr inbounds i32* %A, i32 %rem +for.body: ; preds = %for.cond + br label %S1 + +S1: ; preds = %for.body + %xor = xor i32 %i.0, 2 + %idxprom = sext i32 %xor to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom store i32 %i.0, i32* %arrayidx, align 4 br label %for.inc -for.inc: +for.inc: ; preds = %S1 %inc = add nsw i32 %i.0, 1 br label %for.cond -next: - br label %for.cond.2 +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc7, %for.end + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc7 ], [ 0, %for.end ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body4, label %for.end9 -for.cond.2: - %i.2 = phi i32 [ 0, %next ], [ %inc.2, %for.inc.2 ] - %exitcond.2 = icmp ne i32 %i.2, 1024 - br i1 %exitcond.2, label %S2, label %for.end +for.body4: ; preds = %for.cond2 + br label %S2 -S2: - %arrayidx.2 = getelementptr inbounds i32* %A, i32 %i.2 - store i32 %i.2, i32* %arrayidx.2, align 4 - br label %for.inc.2 +S2: ; preds = %for.body4 + %arrayidx6 = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp = trunc i64 %indvars.iv to i32 + store i32 %tmp, i32* %arrayidx6, align 4 + br label %for.inc7 -for.inc.2: - %inc.2 = add nsw i32 %i.2, 1 - br label %for.cond.2 +for.inc7: ; preds = %S2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond2 -for.end: +for.end9: ; preds = %for.cond2 ret void } - Index: test/DeadCodeElimination/non-affine-but-modulo-and-affine-mix.ll =================================================================== --- test/DeadCodeElimination/non-affine-but-modulo-and-affine-mix.ll +++ test/DeadCodeElimination/non-affine-but-modulo-and-affine-mix.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-allow-nonaffine -polly-dce -polly-ast -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-dce -polly-ast -analyze < %s | FileCheck %s ; ; void f(int *A) { ; for (int i = 0; i < 1024; i++) @@ -6,13 +6,7 @@ ; for (int i = 0; i < 1024; i++) ; S2: A[i2] = i; ; } - -; We unfortunately do need to execute all iterations of S1, as we do not know -; the size of A and as a result S1 may write for example to A[1024], which -; is not overwritten by S2. - -; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) -; CHECK: Stmt_S1(c1); +; ; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) ; CHECK: Stmt_S2(c1); Index: test/ScopDetect/non_affine_but_modulo_access.ll =================================================================== --- test/ScopDetect/non_affine_but_modulo_access.ll +++ test/ScopDetect/non_affine_but_modulo_access.ll @@ -1,18 +1,15 @@ -; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect -analyze < %s | FileCheck %s ; -; FIXME: We cannot detect this SCoP yet but as soon as we can we should check -; that the reduction is detected! +; CHECK: Valid Region for Scop: ; -; CHECK-NOT: Scattering -; -; void f(int *A) { +; void jd(int *A) { ; for (int i = 0; i < 1024; i++) -; A[i % 2] += i; +; A[i % 2] = A[i % 2 + 1]; ; } ; -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define void @f(i32* %A) { +define void @jd(i32* %A) { entry: br label %for.cond @@ -23,10 +20,14 @@ for.body: ; preds = %for.cond %rem = srem i32 %i.0, 2 - %arrayidx = getelementptr inbounds i32* %A, i32 %rem + %add = add nsw i32 %rem, 1 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom %tmp = load i32* %arrayidx, align 4 - %add = add nsw i32 %tmp, %i.0 - store i32 %add, i32* %arrayidx, align 4 + %rem1 = srem i32 %i.0, 2 + %idxprom2 = sext i32 %rem1 to i64 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2 + store i32 %tmp, i32* %arrayidx3, align 4 br label %for.inc for.inc: ; preds = %for.body Index: test/ScopDetect/non_affine_but_modulo_condition.ll =================================================================== --- /dev/null +++ test/ScopDetect/non_affine_but_modulo_condition.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-detect -analyze < %s | FileCheck %s +; +; TODO: The modulo support needs to be extended to conditions and loop bounds. +; +; CHECK-NOT: Valid Region for Scop: +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; if (i % 2) +; A[i] += 1; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = trunc i64 %indvars.iv to i32 + %rem1 = and i32 %tmp, 1 + %tobool = icmp eq i32 %rem1, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp2 = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp2, 1 + store i32 %add, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %for.body, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_alternating_base.ll =================================================================== --- test/ScopInfo/reduction_alternating_base.ll +++ test/ScopInfo/reduction_alternating_base.ll @@ -1,9 +1,9 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; -; FIXME: We cannot detect this SCoP yet but as soon as we can we should check -; that the reduction is detected! -; -; CHECK-NOT: Scattering +; CHECK: ReadAccess := [Reduction Type: +] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[o0] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 <= 1 and o0 >= 0) }; +; CHECK: MustWriteAccess := [Reduction Type: +] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[o0] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 <= 1 and o0 >= 0) }; ; ; void f(int *A) { ; for (int i = 0; i < 1024; i++)