diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1957,9 +1957,14 @@ // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; - bool SafeToInsertStore = false; bool StoreIsGuanteedToExecute = false; bool FoundLoadToPromote = false; + // Goes from Unknown to either Safe or Unsafe, but can't switch between them. + enum { + StoreSafe, + StoreUnsafe, + StoreSafetyUnknown, + } StoreSafety = StoreSafetyUnknown; SmallVector LoopUses; @@ -1981,7 +1986,7 @@ // after return and thus can't possibly load from the object. Value *Object = getUnderlyingObject(SomePtr); if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT)) - return false; + StoreSafety = StoreUnsafe; } // Check that all accesses to pointers in the alias set use the same type. @@ -2040,7 +2045,8 @@ StoreIsGuanteedToExecute |= GuaranteedToExecute; if (GuaranteedToExecute) { DereferenceableInPH = true; - SafeToInsertStore = true; + if (StoreSafety == StoreSafetyUnknown) + StoreSafety = StoreSafe; Alignment = std::max(Alignment, InstAlignment); } @@ -2050,10 +2056,11 @@ // introducing stores on paths that did not have them. // Note that this only looks at explicit exit blocks. If we ever // start sinking stores into unwind edges (see above), this will break. - if (!SafeToInsertStore) - SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { - return DT->dominates(Store->getParent(), Exit); - }); + if (StoreSafety == StoreSafetyUnknown && + llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { + return DT->dominates(Store->getParent(), Exit); + })) + StoreSafety = StoreSafe; // If the store is not guaranteed to execute, we may still get // deref info through it. @@ -2105,22 +2112,22 @@ // Check whether the location is thread-local. If it is, then we can insert // stores along paths which originally didn't have them without violating the // memory model. - if (!SafeToInsertStore) { + if (StoreSafety == StoreSafetyUnknown) { Value *Object = getUnderlyingObject(SomePtr); - SafeToInsertStore = - (isNoAliasCall(Object) || isa(Object) || + if ((isNoAliasCall(Object) || isa(Object) || (isa(Object) && cast(Object)->hasByValAttr())) && - isNotCapturedBeforeOrInLoop(Object, CurLoop, DT); + isNotCapturedBeforeOrInLoop(Object, CurLoop, DT)) + StoreSafety = StoreSafe; } // If we've still failed to prove we can sink the store, hoist the load // only, if possible. - if (!SafeToInsertStore && !FoundLoadToPromote) + if (StoreSafety != StoreSafe && !FoundLoadToPromote) // If we cannot hoist the load either, give up. return false; // Lets do the promotion! - if (SafeToInsertStore) + if (StoreSafety == StoreSafe) LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr << '\n'); else @@ -2146,7 +2153,7 @@ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, - SafeToInsertStore); + StoreSafety == StoreSafe); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/llvm/test/Transforms/LICM/guards.ll b/llvm/test/Transforms/LICM/guards.ll --- a/llvm/test/Transforms/LICM/guards.ll +++ b/llvm/test/Transforms/LICM/guards.ll @@ -109,17 +109,18 @@ br label %loop } -; Hoist guard. Cannot hoist load because of aliasing. +; Hoist guard. Cannot hoist load because of aliasing, but can promote. define void @test3(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ] +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[ENTRY:%.*]] ], [ 0, [[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[X_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 -; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[TMP0]] ; CHECK-NEXT: br label [[LOOP]] ; diff --git a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll --- a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll +++ b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll @@ -10,11 +10,12 @@ define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1 ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: @@ -150,11 +151,12 @@ define void @test_sret(i32* noalias sret(i32) %a, i1 zeroext %y) uwtable { ; CHECK-LABEL: @test_sret( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1 ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 ; CHECK-NEXT: br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: @@ -466,17 +468,18 @@ ret void } -; The malloc'ed memory can be captured and therefore not promoted. +; The malloc'ed memory can be captured and therefore only loads can be promoted. define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @malloc_capture( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_LATCH:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 +; CHECK-NEXT: [[ADD1:%.*]] = phi i32 [ [[DOTPROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_LATCH:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_LATCH]] ] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD1]], 1 ; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 ; CHECK-NEXT: br label [[FOR_CALL:%.*]] ; CHECK: for.call: @@ -492,15 +495,15 @@ ; CHECK: for.end: ; CHECK-NEXT: br label [[FUN_RET:%.*]] ; CHECK: lpad: -; CHECK-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1 ; CHECK-NEXT: br label [[CATCH:%.*]] ; CHECK: catch: -; CHECK-NEXT: [[TMP5:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP3]]) -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8* -; CHECK-NEXT: call void @free(i8* [[TMP6]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* +; CHECK-NEXT: call void @free(i8* [[TMP5]]) ; CHECK-NEXT: call void @__cxa_end_catch() ; CHECK-NEXT: br label [[FUN_RET]] ; CHECK: fun.ret: