Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -250,27 +250,47 @@ // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. - if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) { + // Don't sink stores from loops without dedicated block exits. Exits + // containing indirect branches are not transformed by loop simplify, + // make sure we catch that. An additional load may be generated in the + // preheader for SSA updater, so also avoid sinking when no preheader + // is available. + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + // Figure out the loop exits and their insertion points SmallVector ExitBlocks; - SmallVector InsertPts; - PredIteratorCache PIC; + L->getUniqueExitBlocks(ExitBlocks); - // Loop over all of the alias sets in the tracker object. - for (AliasSet &AS : *CurAST) - Changed |= promoteLoopAccessesToScalars( - AS, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, CurAST, &SafetyInfo); - - // Once we have promoted values across the loop body we have to recursively - // reform LCSSA as any nested loop may now have values defined within the - // loop used in the outer loop. - // FIXME: This is really heavy handed. It would be a bit better to use an - // SSAUpdater strategy during promotion that was LCSSA aware and reformed - // it as it went. - if (Changed) { - formLCSSARecursively(*L, *DT, LI, SE); + // We can't insert into a catchswitch. + bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { + return isa(Exit->getTerminator()); + }); + + if (!HasCatchSwitch) { + SmallVector InsertPts; + InsertPts.reserve(ExitBlocks.size()); + for (BasicBlock *ExitBlock : ExitBlocks) + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + + PredIteratorCache PIC; + + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) + Changed |= + promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT, + TLI, L, CurAST, &SafetyInfo); } } + // Once we have promoted values across the loop body we have to + // recursively + // reform LCSSA as any nested loop may now have values defined within the + // loop used in the outer loop. + // FIXME: This is really heavy handed. It would be a bit better to use an + // SSAUpdater strategy during promotion that was LCSSA aware and reformed + // it as it went. + if (Changed) + formLCSSARecursively(*L, *DT, LI, SE); + // Check that neither this loop nor its parent have had LCSSA broken. LICM is // specifically moving instructions across the loop boundary and so it is // especially in need of sanity checking here. @@ -887,17 +907,23 @@ // path which did not originally have one. // // It is safe to promote P if all uses are direct load/stores and if at - // least one is guaranteed to be executed. - bool GuaranteedToExecute = false; + // least one store is guaranteed to be executed. + bool PromotionIsLegal = false; // It is also safe to promote P if we can prove that speculating a load into // the preheader is safe (i.e. proving dereferenceability on all - // paths through the loop), and that the memory can be proven thread local - // (so that the memory model requirement doesn't apply.) We first establish - // the former, and then run a capture analysis below to establish the later. - // We can use any access within the alias set to prove dereferenceability - // since they're all must alias. + // paths through the loop), and if we are sure we don't introduce memory + // model issues by sinking the store. + // A store is safe to sink if: + // a) We can prove the memory it stores to is thread local, in which case + // the memory model requirement does not apply. + // b) It was already guaranteed to execute at least once (see above). + // c) It was not guaranteed to execute - but only because we may exit the loop + // through an exception. This guarantees that dynamic executions which would + // not execute the original store will not execute the exit block, so no + // new store is introduced. bool CanSpeculateLoad = false; + bool StoreDominatesExits = false; SmallVector LoopUses; SmallPtrSet PointerMustAliases; @@ -906,15 +932,9 @@ // us to prove better alignment. unsigned Alignment = 1; AAMDNodes AATags; - bool HasDedicatedExits = CurLoop->hasDedicatedExits(); - // Don't sink stores from loops without dedicated block exits. Exits - // containing indirect branches are not transformed by loop simplify, - // make sure we catch that. An additional load may be generated in the - // preheader for SSA updater, so also avoid sinking when no preheader - // is available. - if (!HasDedicatedExits || !Preheader) - return false; + assert(CurLoop->hasDedicatedExits() && Preheader && + "Expected prehader and dedicated loop exits."); const DataLayout &MDL = Preheader->getModule()->getDataLayout(); @@ -955,7 +975,7 @@ if (!Load->isSimple()) return Changed; - if (!GuaranteedToExecute && !CanSpeculateLoad) + if (!PromotionIsLegal && !CanSpeculateLoad) CanSpeculateLoad = isSafeToExecuteUnconditionally( *Load, DT, CurLoop, SafetyInfo, Preheader->getTerminator()); } else if (const StoreInst *Store = dyn_cast(UI)) { @@ -967,10 +987,6 @@ if (!Store->isSimple()) return Changed; - // Note that we only check GuaranteedToExecute inside the store case - // so that we do not introduce stores where they did not exist before - // (which would break the LLVM concurrency model). - // If the alignment of this instruction allows us to specify a more // restrictive (and performant) alignment and if we are sure this // instruction will be executed, update the alignment. @@ -979,18 +995,25 @@ if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0) { if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) { - GuaranteedToExecute = true; + PromotionIsLegal = true; Alignment = InstAlignment; } - } else if (!GuaranteedToExecute) { - GuaranteedToExecute = + } else if (!PromotionIsLegal) { + PromotionIsLegal = isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo); } - if (!GuaranteedToExecute && !CanSpeculateLoad) { - CanSpeculateLoad = isDereferenceableAndAlignedPointer( - Store->getPointerOperand(), Store->getAlignment(), MDL, - Preheader->getTerminator(), DT); + if (!PromotionIsLegal) { + if (!CanSpeculateLoad) + CanSpeculateLoad = isDereferenceableAndAlignedPointer( + Store->getPointerOperand(), Store->getAlignment(), MDL, + Preheader->getTerminator(), DT); + if(!StoreDominatesExits) { + StoreDominatesExits = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { + return DT->dominates(Store->getParent(), Exit); + }); + } + } } else return Changed; // Not a load or store. @@ -1008,33 +1031,20 @@ } // Check legality per comment above. Otherwise, we can't promote. - bool PromotionIsLegal = GuaranteedToExecute; if (!PromotionIsLegal && CanSpeculateLoad) { + PromotionIsLegal = StoreDominatesExits; // If this is a thread local location, then we can insert stores along // paths which originally didn't have them without violating the memory // model. - Value *Object = GetUnderlyingObject(SomePtr, MDL); - PromotionIsLegal = - isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true); + if (!PromotionIsLegal) { + Value *Object = GetUnderlyingObject(SomePtr, MDL); + PromotionIsLegal = isAllocLikeFn(Object, TLI) && + !PointerMayBeCaptured(Object, true, true); + } } if (!PromotionIsLegal) return Changed; - // Figure out the loop exits and their insertion points, if this is the - // first promotion. - if (ExitBlocks.empty()) { - CurLoop->getUniqueExitBlocks(ExitBlocks); - InsertPts.clear(); - InsertPts.reserve(ExitBlocks.size()); - for (BasicBlock *ExitBlock : ExitBlocks) - InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); - } - - // Can't insert into a catchswitch. - for (BasicBlock *ExitBlock : ExitBlocks) - if (isa(ExitBlock->getTerminator())) - return Changed; - // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'); Index: test/Transforms/LICM/scalar_promote.ll =================================================================== --- test/Transforms/LICM/scalar_promote.ll +++ test/Transforms/LICM/scalar_promote.ll @@ -186,6 +186,188 @@ ; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0 } +declare i32 @opaque(i32) + +; We can promote even if opaque may throw. +define i32 @test7() { +; CHECK-LABEL: @test7( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: load i32, i32* %local +; CHECK-NEXT: br label %loop +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ] +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + br label %loop + + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %loop ] + %x = load i32, i32* %local + %x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local + store i32 %x2, i32* %local + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + +; Make sure we don't promote if the store is really control-flow dependent. +define i32 @test7bad() { +; CHECK-LABEL: @test7bad( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: br label %loop +; CHECK: if: +; CHECK-NEXT: store i32 %x2, i32* %local +; CHECK-NEXT: br label %else +; CHECK: exit: +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + br label %loop +loop: + %j = phi i32 [ 0, %entry ], [ %next, %else ] + %x = load i32, i32* %local + %x2 = call i32 @opaque(i32 %x) ; Note this does not capture %local + %cmp = icmp eq i32 %x2, 0 + br i1 %cmp, label %if, label %else + +if: + store i32 %x2, i32* %local + br label %else + +else: + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + +; Even if neither the load nor the store or guaranteed to execute because +; opaque() may throw, we can still promote - the load not being guaranteed +; doesn't block us, because %local is always dereferencible. +define i32 @test8() { +; CHECK-LABEL: @test8( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: load i32, i32* %local +; CHECK-NEXT: br label %loop +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ] +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %loop ] + %throwaway = call i32 @opaque(i32 %j) + %x = load i32, i32* %local + %x2 = call i32 @opaque(i32 %x) + store i32 %x2, i32* %local + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + + +; If the store is "guaranteed modulo exceptions", and the load depends on +; control flow, we can only promote if the pointer is otherwise known to be +; dereferencible +define i32 @test9() { +; CHECK-LABEL: @test9( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: load i32, i32* %local +; CHECK-NEXT: br label %loop +; CHECK: exit: +; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %else ] +; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local +; CHECK-NEXT: %ret = load i32, i32* %local +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %else ] + %j2 = call i32 @opaque(i32 %j) + %cmp = icmp eq i32 %j2, 0 + br i1 %cmp, label %if, label %else + +if: + %x = load i32, i32* %local + br label %else + +else: + %x2 = phi i32 [ 0, %loop ], [ %x, %if] + store i32 %x2, i32* %local + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %local + ret i32 %ret +} + +define i32 @test9bad(i32 %i) { +; CHECK-LABEL: @test9bad( +; CHECK: entry: +; CHECK-NEXT: %local = alloca +; CHECK-NEXT: %notderef = getelementptr +; CHECK-NEXT: br label %loop +; CHECK: if: +; CHECK-NEXT: load i32, i32* %notderef +; CHECK-NEXT: br label %else +; CHECK: exit: +; CHECK-NEXT: %ret = load i32, i32* %notderef +; CHECK-NEXT: ret i32 %ret +entry: + %local = alloca i32 + %notderef = getelementptr i32, i32* %local, i32 %i + br label %loop + +loop: + %j = phi i32 [ 0, %entry ], [ %next, %else ] + %j2 = call i32 @opaque(i32 %j) + %cmp = icmp eq i32 %j2, 0 + br i1 %cmp, label %if, label %else + +if: + %x = load i32, i32* %notderef + br label %else + +else: + %x2 = phi i32 [ 0, %loop ], [ %x, %if] + store i32 %x2, i32* %notderef + %next = add i32 %j, 1 + %cond = icmp eq i32 %next, 0 + br i1 %cond, label %exit, label %loop + +exit: + %ret = load i32, i32* %notderef + ret i32 %ret +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}