Index: polly/trunk/include/polly/CodeGen/BlockGenerators.h =================================================================== --- polly/trunk/include/polly/CodeGen/BlockGenerators.h +++ polly/trunk/include/polly/CodeGen/BlockGenerators.h @@ -535,22 +535,6 @@ void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); - /// @brief Helper to get the newest version of @p ScalarValue. - /// - /// @param ScalarValue The original value needed. - /// @param R The current SCoP region. - /// @param Stmt The ScopStmt in which we look up this value. - /// @param LTS A mapping from loops virtual canonical induction - /// variable to their new values - /// (for values recalculated in the new ScoP, but not - /// within this basic block) - /// @param BBMap A mapping from old values to their new values - /// (for values recalculated within this basic block). - /// - /// @returns The newest version (e.g., reloaded) of the scalar value. - Value *getNewScalarValue(Value *ScalarValue, const Region &R, ScopStmt &, - LoopToScevMapT <S, ValueMapT &BBMap); - /// @brief Helper to determine if @p Inst can be synthezised in @p Stmt. /// /// @returns false, iff @p Inst can be synthesized in @p Stmt. Index: polly/trunk/lib/CodeGen/BlockGenerators.cpp =================================================================== --- polly/trunk/lib/CodeGen/BlockGenerators.cpp +++ polly/trunk/lib/CodeGen/BlockGenerators.cpp @@ -410,56 +410,9 @@ } } -Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, - ScopStmt &Stmt, LoopToScevMapT <S, - ValueMapT &BBMap) { - // If the value we want to store is an instruction we might have demoted it - // in order to make it accessible here. In such a case a reload is - // necessary. If it is no instruction it will always be a value that - // dominates the current point and we can just use it. In total there are 4 - // options: - // (1) The value is no instruction ==> use the value. - // (2) The value is an instruction that was split out of the region prior to - // code generation ==> use the instruction as it dominates the region. - // (3) The value is an instruction: - // (a) The value was defined in the current block, thus a copy is in - // the BBMap ==> use the mapped value. - // (b) The value was defined in a previous block, thus we demoted it - // earlier ==> use the reloaded value. - Instruction *ScalarValueInst = dyn_cast(ScalarValue); - if (!ScalarValueInst) - return ScalarValue; - - if (!R.contains(ScalarValueInst)) { - if (Value *ScalarValueCopy = GlobalMap.lookup(ScalarValueInst)) - return /* Case (3a) */ ScalarValueCopy; - else - return /* Case 2 */ ScalarValue; - } - - if (Value *ScalarValueCopy = BBMap.lookup(ScalarValueInst)) - return /* Case (3a) */ ScalarValueCopy; - - if ((Stmt.isBlockStmt() && - Stmt.getBasicBlock() == ScalarValueInst->getParent()) || - (Stmt.isRegionStmt() && Stmt.getRegion()->contains(ScalarValueInst))) { - auto SynthesizedValue = trySynthesizeNewValue( - Stmt, ScalarValueInst, BBMap, LTS, getLoopForInst(ScalarValueInst)); - - if (SynthesizedValue) - return SynthesizedValue; - } - - // Case (3b) - Value *Address = getOrCreateScalarAlloca(ScalarValueInst); - ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload"); - - return ScalarValue; -} - void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { - const Region &R = Stmt.getParent()->getRegion(); + Loop *L = LI.getLoopFor(Stmt.getBasicBlock()); assert(Stmt.isBlockStmt() && "Region statements need to use the " "generateScalarStores() function in the " @@ -472,7 +425,7 @@ Value *Val = MA->getAccessValue(); auto *Address = getOrCreateAlloca(*MA); - Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); + Val = getNewValue(Stmt, Val, BBMap, LTS, L); Builder.CreateStore(Val, Address); } } @@ -1228,12 +1181,13 @@ void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { - const Region &R = Stmt.getParent()->getRegion(); - assert(Stmt.getRegion() && "Block statements need to use the generateScalarStores() " "function in the BlockGenerator"); + // TODO: Add some test cases that ensure this is really the right choice. + Loop *L = LI.getLoopFor(Stmt.getRegion()->getExit()); + for (MemoryAccess *MA : Stmt) { if (MA->isArrayKind() || MA->isRead()) continue; @@ -1260,7 +1214,7 @@ auto Address = getOrCreateAlloca(*MA); - Val = getNewScalarValue(Val, R, Stmt, LTS, *LocalBBMap); + Val = getNewValue(Stmt, Val, *LocalBBMap, LTS, L); Builder.CreateStore(Val, Address); // Restore the insertion point if necessary. Index: polly/trunk/test/Isl/CodeGen/invariant_load_scalar_escape_alloca_sharing.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/invariant_load_scalar_escape_alloca_sharing.ll +++ polly/trunk/test/Isl/CodeGen/invariant_load_scalar_escape_alloca_sharing.ll @@ -1,6 +1,14 @@ ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s ; -; Verify the preloaded %0 is stored and communicated in the same alloca. +; Verify the preloaded %tmp0 is stored and communicated in the same alloca. +; In this case, we do not reload %ncol.load from the scalar stack slot, but +; instead use directly the preloaded value stored in GlobalMap. +; +; TODO: We may want to not add preloaded values to GlobalMap, but instead model +; them as normal read/write memory accesses. This will allow us to +; easily reason about the use of preloaded data in scop statements. +; At the moment, we would need to scan the IR to understand if a stmt +; uses any preloaded values. ; ; CHECK-NOT: alloca ; CHECK: %dec3.s2a = alloca i32 @@ -15,7 +23,7 @@ ; ; CHECK: polly.stmt.while.body.lr.ph: ; CHECK-NEXT: %tmp0.preload.s2a.reload = load i32, i32* %tmp0.preload.s2a -; CHECK-NEXT: store i32 %tmp0.preload.s2a.reload, i32* %dec3.in.phiops +; CHECK-NEXT: store i32 %ncol.load, i32* %dec3.in.phiops ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/phi-defined-before-scop.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/phi-defined-before-scop.ll +++ polly/trunk/test/Isl/CodeGen/phi-defined-before-scop.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: polly.stmt.bb3: ; CHECK-NEXT: %tmp2.s2a.reload = load %struct.wibble*, %struct.wibble** %tmp2.s2a -; CHECK-NEXT: store %struct.wibble* %tmp2, %struct.wibble** %tmp7.s2a +; CHECK-NEXT: store %struct.wibble* %tmp2.s2a.reload, %struct.wibble** %tmp7.s2a target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/synthesizable_phi_write_after_loop.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/synthesizable_phi_write_after_loop.ll +++ polly/trunk/test/Isl/CodeGen/synthesizable_phi_write_after_loop.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; Check for the correct written value of a scalar phi write whose value is +; defined within the loop, but its effective value is its last definition when +; leaving the loop (in this test it is the value 2 for %i.inc). This can be +; either computed: +; - Using SCEVExpander: +; In this case the Loop passed to the expander must NOT be the loop +; - Overwriting the same alloca in each iteration s.t. the last value will +; retain in %i.inc.s2a +; The latter is currently generated by Polly and tested here. + +; CHECK: polly.stmt.next: +; CHECK-NEXT: %i.inc.s2a.reload = load i32, i32* %i.inc.s2a +; CHECK-NEXT: store i32 %i.inc.s2a.reload, i32* %phi.phiops +; CHECK-NEXT: br label %polly.stmt.join +; +; CHECK: polly.stmt.loop: +; CHECK: %0 = trunc i64 %polly.indvar to i32 +; CHECK: %1 = add i32 %0, 1 +; CHECK: store i32 %1, i32* %i.inc.s2a + +define i32 @func() { +entry: + br label %start + +start: + br i1 true, label %loop, label %join + +loop: + %i = phi i32 [ 0, %start ], [ %i.inc, %loop ] + %i.inc = add nsw i32 %i, 1 + %cond = icmp slt i32 %i.inc, 2 + br i1 %cond, label %loop, label %next + +next: + br label %join + +join: + %phi = phi i32 [%i.inc, %next], [0, %start] + br label %return + +return: + ret i32 %phi +}