Index: include/llvm/Analysis/AliasSetTracker.h =================================================================== --- include/llvm/Analysis/AliasSetTracker.h +++ include/llvm/Analysis/AliasSetTracker.h @@ -220,17 +220,7 @@ /// If this alias set is known to contain a single instruction and *only* a /// single unique instruction, return it. Otherwise, return nullptr. - Instruction* getUniqueInstruction() { - if (size() != 0) - // Can't track source of pointer, might be many instruction - return nullptr; - if (AliasAny) - // May have collapses alias set - return nullptr; - if (1 != UnknownInsts.size()) - return nullptr; - return cast(UnknownInsts[0]); - } + Instruction* getUniqueInstruction(); void print(raw_ostream &OS) const; void dump() const; Index: lib/Analysis/AliasSetTracker.cpp =================================================================== --- lib/Analysis/AliasSetTracker.cpp +++ lib/Analysis/AliasSetTracker.cpp @@ -252,6 +252,31 @@ return false; } +Instruction* AliasSet::getUniqueInstruction() { + if (AliasAny) + // May have collapses alias set + return nullptr; + if (begin() != end()) { + if (!UnknownInsts.empty()) + // Another instruction found + return nullptr; + if (std::next(begin()) != end()) + // Another instruction found + return nullptr; + Value *Addr = begin()->getValue(); + assert(!Addr->user_empty() && + "where's the instruction which added this pointer?"); + if (std::next(Addr->user_begin()) != Addr->user_end()) + // Another instruction found -- this is really restrictive + // TODO: generalize! + return nullptr; + return cast(*(Addr->user_begin())); + } + if (1 != UnknownInsts.size()) + return nullptr; + return cast(UnknownInsts[0]); +} + void AliasSetTracker::clear() { // Delete all the PointerRec entries. for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -421,7 +421,8 @@ // bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && + !I.mayHaveSideEffects()) { if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { if (!FreeInLoop) { ++II; @@ -607,8 +608,8 @@ /// concerns such as aliasing or speculation safety. bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. - return (isa(I) || isa(I) || - isa(I) || + return (isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || @@ -702,6 +703,7 @@ // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { + // TODO: expand to writeable arguments for (Value *Op : CI->arg_operands()) if (Op->getType()->isPointerTy() && pointerInvalidatedByLoop( @@ -736,6 +738,24 @@ (void)FI; //suppress unused variable warning assert(UniqueI == FI && "AS must contain FI"); return true; + } else if (auto *SI = dyn_cast(&I)) { + if (!SI->isUnordered()) + return false; // Don't sink/hoist volatile or ordered atomic store! + + // We can only hoist a store that we can prove writes a value which is not + // read or overwritten within the loop. For those cases, we fallback to + // load store promotion instead. + auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); + + if (AS.isRef() || !AS.isMustAlias()) + // Quick exit test, handled by the full path below as well. + return false; + auto *UniqueI = AS.getUniqueInstruction(); + if (!UniqueI) + // other memory op, give up + return false; + assert(UniqueI == SI && "AS must contain SI"); + return true; } assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); Index: test/Transforms/LICM/atomics.ll =================================================================== --- test/Transforms/LICM/atomics.ll +++ test/Transforms/LICM/atomics.ll @@ -173,10 +173,11 @@ end: ret i32 %vala ; CHECK-LABEL: define i32 @test7b( +; CHECK-LABEL: entry: +; CHECK: store i32 5, i32* %x +; CHECK-LABEL: loop: ; CHECK: load atomic i32, i32* %y monotonic - ; CHECK-LABEL: end: -; CHECK: store i32 5, i32* %x ; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4 } Index: test/Transforms/LICM/funclet.ll =================================================================== --- test/Transforms/LICM/funclet.ll +++ test/Transforms/LICM/funclet.ll @@ -97,9 +97,11 @@ } ; CHECK-LABEL: define void @test3( -; CHECK: catchswitch within none +; CHECK-LABEL: forbody.preheader: ; CHECK: store i32 1, i32* %bc, align 4 ; CHECK: store i32 2, i32* %bc2, align 4 +; CHECK: catchswitch within none +; CHECK-LABEL: forbody: declare void @may_throw() Index: test/Transforms/LICM/promote-order.ll =================================================================== --- test/Transforms/LICM/promote-order.ll +++ test/Transforms/LICM/promote-order.ll @@ -10,6 +10,11 @@ @p = external global i8* define i32* @_Z4doiti(i32 %n, float* %tmp1, i32* %tmp3) nounwind { +; CHECK-LABEL: for.body.lr.ph: +; CHECK: store float 1.000000e+00, float* %tmp1 +; CHECK-LABEL: for.cond.for.end_crit_edge: +; CHECK: store i32 1, i32* %tmp3 + entry: %cmp1 = icmp slt i32 0, %n br i1 %cmp1, label %for.body.lr.ph, label %for.end @@ -25,9 +30,6 @@ %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge -; CHECK: for.cond.for.end_crit_edge: -; CHECK: store float 1.000000e+00, float* %tmp1 -; CHECK: store i32 1, i32* %tmp3 for.cond.for.end_crit_edge: ; preds = %for.body %split = phi i32* [ %tmp3, %for.body ] br label %for.end Index: test/Transforms/LICM/store-hoisting.ll =================================================================== --- test/Transforms/LICM/store-hoisting.ll +++ test/Transforms/LICM/store-hoisting.ll @@ -3,8 +3,9 @@ define void @test(i32* %loc) { ; CHECK-LABEL: @test -; CHECK-LABEL: exit: +; CHECK-LABEL: entry: ; CHECK: store i32 0, i32* %loc +; CHECK-LABEL: loop: entry: br label %loop @@ -21,10 +22,9 @@ define void @test_multiexit(i32* %loc, i1 %earlycnd) { ; CHECK-LABEL: @test_multiexit -; CHECK-LABEL: exit1: -; CHECK: store i32 0, i32* %loc -; CHECK-LABEL: exit2: +; CHECK-LABEL: entry: ; CHECK: store i32 0, i32* %loc +; CHECK-LABEL: loop: entry: br label %loop @@ -44,6 +44,24 @@ ret void } +define i32* @false_negative_2use(i32* %loc) { +; CHECK-LABEL: @false_negative_2use +; CHECK-LABEL: exit: +; CHECK: store i32 0, i32* %loc +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + store i32 0, i32* %loc + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit + +exit: + ret i32* %loc +} + define void @neg_lv_value(i32* %loc) { ; CHECK-LABEL: @neg_lv_value ; CHECK-LABEL: exit: