Index: include/llvm/Analysis/AliasSetTracker.h =================================================================== --- include/llvm/Analysis/AliasSetTracker.h +++ include/llvm/Analysis/AliasSetTracker.h @@ -221,12 +221,25 @@ /// If this alias set is known to contain a single instruction and *only* a /// single unique instruction, return it. Otherwise, return nullptr. Instruction* getUniqueInstruction() { - if (size() != 0) - // Can't track source of pointer, might be many instruction - return nullptr; if (AliasAny) // May have collapses alias set return nullptr; + if (begin() != end()) { + if (!UnknownInsts.empty()) + // Another instruction found + return nullptr;; + if (std::next(begin()) != end()) + // Another instruction found + return nullptr; + Value *Addr = begin()->getValue(); + assert(!Addr->user_empty() && + "where's the instruction which added this pointer?"); + if (std::next(Addr->user_begin()) != Addr->user_end()) + // Another instruction found -- this is really restrictive + // TODO: generalize! + return nullptr; + return cast(*(Addr->user_begin())); + } if (1 != UnknownInsts.size()) return nullptr; return cast(UnknownInsts[0]); Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -421,7 +421,8 @@ // bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && + !I.mayHaveSideEffects()) { if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { if (!FreeInLoop) { ++II; @@ -607,8 +608,8 @@ /// concerns such as aliasing or speculation safety. bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. - return (isa(I) || isa(I) || - isa(I) || + return (isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || @@ -702,6 +703,7 @@ // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { + // TODO: expand to writeable arguments for (Value *Op : CI->arg_operands()) if (Op->getType()->isPointerTy() && pointerInvalidatedByLoop( @@ -716,7 +718,19 @@ if (isReadOnly(CurAST)) return true; } - + if (AliasAnalysis::doesNotReadMemory(Behavior)) { + auto Begin = CurAST->begin(); + assert(Begin != CurAST->end() && "must contain FI"); + if (std::next(Begin) != CurAST->end()) + // constant memory for instance, TODO: handle better + return false; + auto *UniqueI = Begin->getUniqueInstruction(); + if (!UniqueI) + // other memory op, give up + return false; + assert(UniqueI == CI && "AS must contain FI"); + return true; + } // FIXME: This should use mod/ref information to see if we can hoist or // sink the call. @@ -736,6 +750,24 @@ (void)FI; //suppress unused variable warning assert(UniqueI == FI && "AS must contain FI"); return true; + } else if (auto *SI = dyn_cast(&I)) { + if (!SI->isUnordered()) + return false; // Don't sink/hoist volatile or ordered atomic store! + + // We can only hoist a store that we can prove writes a value which is not + // read or overwritten within the loop. For those cases, we fallback to + // load store promotion instead. + auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); + + if (AS.isRef() || !AS.isMustAlias()) + // Quick exit test, handled by the full path below as well. + return false; + auto *UniqueI = AS.getUniqueInstruction(); + if (!UniqueI) + // other memory op, give up + return false; + assert(UniqueI == SI && "AS must contain SI"); + return true; } assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); Index: test/Transforms/LICM/atomics.ll =================================================================== --- test/Transforms/LICM/atomics.ll +++ test/Transforms/LICM/atomics.ll @@ -173,10 +173,11 @@ end: ret i32 %vala ; CHECK-LABEL: define i32 @test7b( +; CHECK-LABEL: entry: +; CHECK: store i32 5, i32* %x +; CHECK-LABEL: loop: ; CHECK: load atomic i32, i32* %y monotonic - ; CHECK-LABEL: end: -; CHECK: store i32 5, i32* %x ; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4 } Index: test/Transforms/LICM/call-hoisting.ll =================================================================== --- test/Transforms/LICM/call-hoisting.ll +++ test/Transforms/LICM/call-hoisting.ll @@ -28,9 +28,9 @@ define void @test(i32* %loc) { ; CHECK-LABEL: @test -; CHECK-LABEL: loop: +; CHECK-LABEL: entry: ; CHECK: call void @store -; CHECK-LABEL: exit: +; CHECK-LABEL: loop: entry: br label %loop @@ -47,9 +47,9 @@ define void @test_multiexit(i32* %loc, i1 %earlycnd) { ; CHECK-LABEL: @test_multiexit -; CHECK-LABEL: loop: +; CHECK-LABEL: entry: ; CHECK: call void @store -; CHECK-LABEL: backedge: +; CHECK-LABEL: loop: entry: br label %loop @@ -219,6 +219,25 @@ ret void } +define void @test_writeonly_loop(i32* %loc) { +; CHECK-LABEL: @test_writeonly_loop +; CHECK-LABEL: entry: +; CHECK: call void @not_argmemonly +; CHECK-LABEL: loop: +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + call void @not_argmemonly(i32 0, i32* %loc) + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + define void @neg_not_argmemonly(i32* %loc) { ; CHECK-LABEL: @neg_not_argmemonly ; CHECK-LABEL: loop: @@ -230,6 +249,7 @@ loop: %iv = phi i32 [0, %entry], [%iv.next, %loop] call void @not_argmemonly(i32 0, i32* %loc) + load volatile i32, i32* %loc %iv.next = add i32 %iv, 1 %cmp = icmp slt i32 %iv, 200 br i1 %cmp, label %loop, label %exit Index: test/Transforms/LICM/funclet.ll =================================================================== --- test/Transforms/LICM/funclet.ll +++ test/Transforms/LICM/funclet.ll @@ -97,9 +97,11 @@ } ; CHECK-LABEL: define void @test3( -; CHECK: catchswitch within none +; CHECK-LABEL: forbody.preheader: ; CHECK: store i32 1, i32* %bc, align 4 ; CHECK: store i32 2, i32* %bc2, align 4 +; CHECK: catchswitch within none +; CHECK-LABEL: forbody: declare void @may_throw() Index: test/Transforms/LICM/promote-order.ll =================================================================== --- test/Transforms/LICM/promote-order.ll +++ test/Transforms/LICM/promote-order.ll @@ -10,6 +10,11 @@ @p = external global i8* define i32* @_Z4doiti(i32 %n, float* %tmp1, i32* %tmp3) nounwind { +; CHECK-LABEL: for.body.lr.ph: +; CHECK: store float 1.000000e+00, float* %tmp1 +; CHECK-LABEL: for.cond.for.end_crit_edge: +; CHECK: store i32 1, i32* %tmp3 + entry: %cmp1 = icmp slt i32 0, %n br i1 %cmp1, label %for.body.lr.ph, label %for.end @@ -25,9 +30,6 @@ %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge -; CHECK: for.cond.for.end_crit_edge: -; CHECK: store float 1.000000e+00, float* %tmp1 -; CHECK: store i32 1, i32* %tmp3 for.cond.for.end_crit_edge: ; preds = %for.body %split = phi i32* [ %tmp3, %for.body ] br label %for.end Index: test/Transforms/LICM/store-hoisting.ll =================================================================== --- test/Transforms/LICM/store-hoisting.ll +++ test/Transforms/LICM/store-hoisting.ll @@ -3,8 +3,9 @@ define void @test(i32* %loc) { ; CHECK-LABEL: @test -; CHECK-LABEL: exit: +; CHECK-LABEL: entry: ; CHECK: store i32 0, i32* %loc +; CHECK-LABEL: loop: entry: br label %loop @@ -21,10 +22,9 @@ define void @test_multiexit(i32* %loc, i1 %earlycnd) { ; CHECK-LABEL: @test_multiexit -; CHECK-LABEL: exit1: -; CHECK: store i32 0, i32* %loc -; CHECK-LABEL: exit2: +; CHECK-LABEL: entry: ; CHECK: store i32 0, i32* %loc +; CHECK-LABEL: loop: entry: br label %loop @@ -44,6 +44,24 @@ ret void } +define i32* @false_negative_2use(i32* %loc) { +; CHECK-LABEL: @false_negative_2use +; CHECK-LABEL: exit: +; CHECK: store i32 0, i32* %loc +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + store i32 0, i32* %loc + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit + +exit: + ret i32* %loc +} + define void @neg_lv_value(i32* %loc) { ; CHECK-LABEL: @neg_lv_value ; CHECK-LABEL: exit: