Index: polly/trunk/lib/Transform/ForwardOpTree.cpp =================================================================== --- polly/trunk/lib/Transform/ForwardOpTree.cpp +++ polly/trunk/lib/Transform/ForwardOpTree.cpp @@ -140,13 +140,6 @@ ForwardingDecision canForwardTree(ScopStmt *TargetStmt, Value *UseVal, ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) { - - // PHis are not yet supported. - if (isa(UseVal)) { - DEBUG(dbgs() << " Cannot forward PHI: " << *UseVal << "\n"); - return FD_CannotForward; - } - VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); switch (VUse.getKind()) { case VirtualUse::Constant: @@ -157,10 +150,31 @@ return FD_DidForward; return FD_CanForwardLeaf; - case VirtualUse::Synthesizable: - // Not supported yet. - DEBUG(dbgs() << " Cannot forward synthesizable: " << *UseVal << "\n"); + case VirtualUse::Synthesizable: { + // ScopExpander will take care for of generating the code at the new + // location. + if (DoIt) + return FD_DidForward; + + // Check if the value is synthesizable at the new location as well. This + // might be possible when leaving a loop for which ScalarEvolution is + // unable to derive the exit value for. + // TODO: If there is a LCSSA PHI at the loop exit, use that one. + // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we + // do not forward past its loop header. This would require us to use a + // previous loop induction variable instead the current one. We currently + // do not allow forwarding PHI nodes, thus this should never occur (the + // only exception where no phi is necessary being an unreachable loop + // without edge from the outside). + VirtualUse TargetUse = VirtualUse::create( + S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); + if (TargetUse.getKind() == VirtualUse::Synthesizable) + return FD_CanForwardLeaf; + + DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: " + << *UseVal << "\n"); return FD_CannotForward; + } case VirtualUse::ReadOnly: // Note that we cannot return FD_CanForwardTree here. With a operand tree @@ -185,6 +199,12 @@ case VirtualUse::Inter: auto Inst = cast(UseVal); + // PHIs, unless synthesizable, are not yet supported. + if (isa(Inst)) { + DEBUG(dbgs() << " Cannot forward PHI: " << *UseVal << "\n"); + return FD_CannotForward; + } + // Compatible instructions must satisfy the following conditions: // 1. Idempotent (instruction will be copied, not moved; although its // original instance might be removed by simplification) Index: polly/trunk/test/ForwardOpTree/forward_synthesizable_definloop.ll =================================================================== --- polly/trunk/test/ForwardOpTree/forward_synthesizable_definloop.ll +++ polly/trunk/test/ForwardOpTree/forward_synthesizable_definloop.ll @@ -0,0 +1,80 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Copy %val to bodyB, assuming the exit value of %i. +; +; for (int j = 0; j < n; j += 1) { +; double val; +; for (int i = 0; i < 128; i += 1) { +; bodyA: +; val = j; +; } +; +; bodyB: +; A[0] = val; +; } +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %inner.for, label %exit + + inner.for: + %i = phi i32 [0, %for], [%i.inc, %inner.inc] + br label %bodyA + + + bodyA: + %val = sitofp i32 %i to double + br label %inner.inc + + + inner.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, 128 + br i1 %i.cmp, label %inner.for, label %inner.exit + + inner.exit: + br label %bodyB + + + bodyB: + store double %val, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Instructions copied: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK: After statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0, i1] -> MemRef_val[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = sitofp i32 %i to double +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = sitofp i32 %i to double +; CHECK-NEXT: store double %val, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ForwardOpTree/forward_synthesizable_indvar.ll =================================================================== --- polly/trunk/test/ForwardOpTree/forward_synthesizable_indvar.ll +++ polly/trunk/test/ForwardOpTree/forward_synthesizable_indvar.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Test support for (synthesizable) inducation variables. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double val = j; +; +; bodyB: +; A[0] = val; +; } +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %val = sitofp i32 %j to double + br label %bodyB + + bodyB: + store double %val, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Instructions copied: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK: After statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = sitofp i32 %j to double +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = sitofp i32 %j to double +; CHECK-NEXT: store double %val, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ForwardOpTree/forward_synthesizable_useinloop.ll =================================================================== --- polly/trunk/test/ForwardOpTree/forward_synthesizable_useinloop.ll +++ polly/trunk/test/ForwardOpTree/forward_synthesizable_useinloop.ll @@ -0,0 +1,80 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Synthesizable values defined outside of a loop can be used +; inside the loop. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double val = j; +; +; for (int i = 0; i < n; i += 1) { +; bodyB: +; A[0] = val; +; } +; } +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %val = sitofp i32 %j to double + br label %inner.for + + + inner.for: + %i = phi i32 [0, %bodyA], [%i.inc, %inner.inc] + %i.cmp = icmp slt i32 %i, %n + br i1 %i.cmp, label %bodyB, label %inner.exit + + + bodyB: + store double %val, double* %A + br label %inner.inc + + + inner.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %inner.for + + inner.exit: + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Instructions copied: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK: After statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = sitofp i32 %j to double +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyB +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0, i1] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = sitofp i32 %j to double +; CHECK-NEXT: store double %val, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ForwardOpTree/noforward_synthesizable.ll =================================================================== --- polly/trunk/test/ForwardOpTree/noforward_synthesizable.ll +++ polly/trunk/test/ForwardOpTree/noforward_synthesizable.ll @@ -1,43 +0,0 @@ -; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines -; -; %val has a synthesizable argument that we currently do not support. -; -; for (int j = 0; j < n; j += 1) { -; bodyA: -; double v = j; -; double val = 21.0 + 21.0; -; -; bodyB: -; A[0] = val; -; } -; -define void @func(i32 %n, double* noalias nonnull %A) { -entry: - br label %for - -for: - %j = phi i32 [0, %entry], [%j.inc, %inc] - %j.cmp = icmp slt i32 %j, %n - br i1 %j.cmp, label %bodyA, label %exit - - bodyA: - %val = sitofp i32 %j to double - br label %bodyB - - bodyB: - store double %val, double* %A - br label %inc - -inc: - %j.inc = add nuw nsw i32 %j, 1 - br label %for - -exit: - br label %return - -return: - ret void -} - - -; CHECK: ForwardOpTree executed, but did not modify anything Index: polly/trunk/test/ForwardOpTree/noforward_synthesizable_unknownit.ll =================================================================== --- polly/trunk/test/ForwardOpTree/noforward_synthesizable_unknownit.ll +++ polly/trunk/test/ForwardOpTree/noforward_synthesizable_unknownit.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Do not try to forward %i.trunc, it is not synthesizable in %body. +; +define void @func(i32 %n, i32* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + %zero = sext i32 0 to i64 + br i1 %j.cmp, label %inner.for, label %exit + + + ; This loop has some unusual properties: + ; * It has a known iteration count (8), therefore SCoP-compatible. + ; * %i.trunc is synthesizable within the loop ({1,+,1}<%while.body>). + ; * %i.trunc is not synthesizable outside of the loop, because its value is + ; unknown when exiting. + ; (should be 8, but ScalarEvolution currently seems unable to derive that) + ; + ; ScalarEvolution currently seems to not able to handle the %zero. + ; If it becomes more intelligent, there might be other such loop constructs. + inner.for: + %i = phi i64 [%zero, %for], [%i.inc, %inner.for] + %i.inc = add nuw nsw i64 %i, 1 + %i.trunc = trunc i64 %i.inc to i32 + %i.and = and i32 %i.trunc, 7 + %inner.cond = icmp eq i32 %i.and, 0 + br i1 %inner.cond, label %body, label %inner.for + + body: + store i32 %i.trunc, i32* %A + br label %inc + + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: ForwardOpTree executed, but did not modify anything