Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -577,10 +577,13 @@ Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { + // SafetyInfo is nullptr if we are checking for sinking from preheader to + // loop body. + const bool SinkingToLoopBody = !SafetyInfo; // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) - return false; // Don't hoist volatile/atomic loads! + return false; // Don't sink/hoist volatile or ordered atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. @@ -589,6 +592,9 @@ if (LI->getMetadata(LLVMContext::MD_invariant_load)) return true; + if (LI->isAtomic() && SinkingToLoopBody) + return false; // Don't sink unordered atomic loads to loop body. + // This checks for an invariant.start dominating the load. if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; @@ -664,9 +670,9 @@ !isa(I)) return false; - // SafetyInfo is nullptr if we are checking for sinking from preheader to - // loop body. It will be always safe as there is no speculative execution. - if (!SafetyInfo) + // If we are checking for sinking from preheader to loop body it will be + // always safe as there is no speculative execution. + if (SinkingToLoopBody) return true; // TODO: Plumb the context instruction through to make hoisting and sinking Index: llvm/trunk/test/Transforms/LICM/loopsink.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/loopsink.ll +++ llvm/trunk/test/Transforms/LICM/loopsink.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s -; RUN: opt -S -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s @g = global i32 0, align 4 @@ -280,6 +280,99 @@ ret i32 10 } +; b1 +; / \ +; b2 b3 +; \ / +; b4 +; preheader: 1000 +; b2: 15 +; b3: 7 +; Do not sink unordered atomic load to b2 +; CHECK: t6 +; CHECK: .preheader: +; CHECK: load atomic i32, i32* @g unordered, align 4 +; CHECK: .b2: +; CHECK-NOT: load atomic i32, i32* @g unordered, align 4 +define i32 @t6(i32, i32) #0 !prof !0 { + %3 = icmp eq i32 %1, 0 + br i1 %3, label %.exit, label %.preheader + +.preheader: + %invariant = load atomic i32, i32* @g unordered, align 4 + br label %.b1 + +.b1: + %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] + %c1 = icmp sgt i32 %iv, %0 + br i1 %c1, label %.b2, label %.b3, !prof !1 + +.b2: + %t1 = add nsw i32 %invariant, %iv + br label %.b4 + +.b3: + %t2 = add nsw i32 %iv, 100 + br label %.b4 + +.b4: + %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] + %t3 = add nuw nsw i32 %iv, 1 + %c2 = icmp eq i32 %t3, %p1 + br i1 %c2, label %.b1, label %.exit, !prof !3 + +.exit: + ret i32 10 +} + +@g_const = constant i32 0, align 4 + +; b1 +; / \ +; b2 b3 +; \ / +; b4 +; preheader: 1000 +; b2: 0.5 +; b3: 999.5 +; Sink unordered atomic load to b2. It is allowed to sink into loop unordered +; load from constant. +; CHECK: t7 +; CHECK: .preheader: +; CHECK-NOT: load atomic i32, i32* @g_const unordered, align 4 +; CHECK: .b2: +; CHECK: load atomic i32, i32* @g_const unordered, align 4 +define i32 @t7(i32, i32) #0 !prof !0 { + %3 = icmp eq i32 %1, 0 + br i1 %3, label %.exit, label %.preheader + +.preheader: + %invariant = load atomic i32, i32* @g_const unordered, align 4 + br label %.b1 + +.b1: + %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] + %c1 = icmp sgt i32 %iv, %0 + br i1 %c1, label %.b2, label %.b3, !prof !1 + +.b2: + %t1 = add nsw i32 %invariant, %iv + br label %.b4 + +.b3: + %t2 = add nsw i32 %iv, 100 + br label %.b4 + +.b4: + %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] + %t3 = add nuw nsw i32 %iv, 1 + %c2 = icmp eq i32 %t3, %p1 + br i1 %c2, label %.b1, label %.exit, !prof !3 + +.exit: + ret i32 10 +} + declare i32 @foo() !0 = !{!"function_entry_count", i64 1}