Index: polly/trunk/include/polly/CodeGen/BlockGenerators.h =================================================================== --- polly/trunk/include/polly/CodeGen/BlockGenerators.h +++ polly/trunk/include/polly/CodeGen/BlockGenerators.h @@ -834,6 +834,77 @@ /// Add the new operand from the copy of @p IncomingBB to @p PHICopy. /// + /// PHI nodes, which may have (multiple) edges that enter from outside the + /// non-affine subregion and even from outside the scop, are code generated as + /// follows: + /// + /// # Original + /// + /// Region: %A-> %exit + /// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC) + /// + /// pre: + /// %val = add i64 1, 1 + /// + /// A: + /// br label %nonaff + /// + /// nonaffB: + /// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D] + /// %cmp = + /// br i1 %cmp, label %C, label %nonaffC + /// + /// nonaffC: + /// %valC = add i64 1, 1 + /// br i1 undef, label %D, label %nonaffB + /// + /// D: + /// %valD = ... + /// %exit_cond = + /// br i1 %exit_cond, label %nonaffB, label %exit + /// + /// exit: + /// ... + /// + /// - %start and %C enter from outside the non-affine region. + /// - %nonaffC enters from within the non-affine region. + /// + /// # New + /// + /// polly.A: + /// store i64 %val, i64* %phi.phiops + /// br label %polly.nonaffA.entry + /// + /// polly.nonaffB.entry: + /// %phi.phiops.reload = load i64, i64* %phi.phiops + /// br label %nonaffB + /// + /// polly.nonaffB: + /// %polly.phi = [%phi.phiops.reload, %nonaffB.entry], + /// [%p.valC, %polly.nonaffC] + /// + /// polly.nonaffC: + /// %p.valC = add i64 1, 1 + /// br i1 undef, label %polly.D, label %polly.nonaffB + /// + /// polly.D: + /// %p.valD = ... + /// store i64 %p.valD, i64* %phi.phiops + /// %p.exit_cond = + /// br i1 %p.exit_cond, label %polly.nonaffB, label %exit + /// + /// Values that enter the PHI from outside the non-affine region are stored + /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and + /// reloaded in %polly.nonaffB.entry, a basic block generated before the + /// actual non-affine region. + /// + /// When generating the PHI node of the non-affine region in %polly.nonaffB, + /// incoming edges from outside the region are combined into a single branch + /// from %polly.nonaffB.entry which has as incoming value the value reloaded + /// from the %phi.phiops stack slot. Incoming edges from within the region + /// refer to the copied instructions (%p.valC) and basic blocks + /// (%polly.nonaffC) of the non-affine region. + /// /// @param Stmt The statement to code generate. /// @param PHI The original PHI we copy. /// @param PHICopy The copy of @p PHI. Index: polly/trunk/lib/CodeGen/BlockGenerators.cpp =================================================================== --- polly/trunk/lib/CodeGen/BlockGenerators.cpp +++ polly/trunk/lib/CodeGen/BlockGenerators.cpp @@ -1415,12 +1415,12 @@ return; } + assert(RegionMaps.count(BBCopy) && "Incoming PHI block did not have a BBMap"); + ValueMapT &BBCopyMap = RegionMaps[BBCopy]; + Value *OpCopy = nullptr; - if (StmtR->contains(IncomingBB)) { - assert(RegionMaps.count(BBCopy) && - "Incoming PHI block did not have a BBMap"); - ValueMapT &BBCopyMap = RegionMaps[BBCopy]; + if (StmtR->contains(IncomingBB)) { Value *Op = PHI->getIncomingValueForBlock(IncomingBB); // If the current insert block is different from the PHIs incoming block @@ -1432,13 +1432,15 @@ if (IP->getParent() != BBCopy) Builder.SetInsertPoint(&*IP); } else { - + // All edges from outside the non-affine region become a single edge + // in the new copy of the non-affine region. Make sure to only add the + // corresponding edge the first time we encounter a basic block from + // outside the non-affine region. if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) return; - Value *PHIOpAddr = getOrCreatePHIAlloca(PHI); - OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", - BlockMap[IncomingBB]->getTerminator()); + // Get the reloaded value. + OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt)); } assert(OpCopy && "Incoming PHI value was not copied properly"); Index: polly/trunk/test/Isl/CodeGen/loop_partially_in_scop.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/loop_partially_in_scop.ll +++ polly/trunk/test/Isl/CodeGen/loop_partially_in_scop.ll @@ -12,11 +12,10 @@ ; CHECK: polly.stmt.bb2.entry: ; preds = %polly.start ; CHECK-NEXT: %tmp.phiops.reload = load i32, i32* %tmp.phiops -; CHECK-NEXT: %tmp.phiops.reload2 = load i32, i32* %tmp.phiops ; CHECK-NEXT: br label %polly.stmt.bb2 ; CHECK: polly.stmt.bb2: ; preds = %polly.stmt.bb2, %polly.stmt.bb2.entry -; CHECK-NEXT: %polly.tmp = phi i32 [ %tmp.phiops.reload2, %polly.stmt.bb2.entry ], [ %p_tmp4, %polly.stmt.bb2 ] +; CHECK-NEXT: %polly.tmp = phi i32 [ %tmp.phiops.reload, %polly.stmt.bb2.entry ], [ %p_tmp4, %polly.stmt.bb2 ] ; CHECK-NEXT: %p_tmp3 = or i32 undef, undef ; CHECK-NEXT: %p_tmp4 = udiv i32 %p_tmp3, 10 ; CHECK-NEXT: %p_tmp6 = icmp eq i8 undef, 0 Index: polly/trunk/test/ScopInfo/out-of-scop-use-in-region-entry-phi-node-nonaffine-subregion.ll =================================================================== --- polly/trunk/test/ScopInfo/out-of-scop-use-in-region-entry-phi-node-nonaffine-subregion.ll +++ polly/trunk/test/ScopInfo/out-of-scop-use-in-region-entry-phi-node-nonaffine-subregion.ll @@ -16,7 +16,7 @@ ; CHECK: %loop_carried.phiops.reload = load float, float* %loop_carried.phiops ; ; CHECK-LABEL: polly.stmt.subregion_entry: -; CHECK: %polly.loop_carried = phi float [ %loop_carried.phiops.reload2, %polly.stmt.subregion_entry.entry ] +; CHECK: %polly.loop_carried = phi float [ %loop_carried.phiops.reload, %polly.stmt.subregion_entry.entry ] ; CHECK: %p_newval = fadd float %polly.loop_carried, 1.000000e+00 ; ; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: