diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -31,6 +31,7 @@ class IRBuilderBase; class Loop; class LoopInfo; +class LoopSafetyInfo; class MemoryAccess; class MemorySSA; class MemorySSAUpdater; @@ -353,6 +354,7 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, + const LoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags *LICMFlags = nullptr, OptimizationRemarkEmitter *ORE = nullptr); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -578,8 +578,8 @@ bool FreeInLoop = false; if (!I.mayHaveSideEffects() && isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, - ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, + SafetyInfo, &Flags, ORE)) { if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { if (!FreeInLoop) { ++II; @@ -918,8 +918,8 @@ // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, - ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, + SafetyInfo, &Flags, ORE) && worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, @@ -1166,6 +1166,7 @@ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, + const LoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags *Flags, OptimizationRemarkEmitter *ORE) { assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && @@ -1194,6 +1195,17 @@ if (LI->isAtomic() && !TargetExecutesOncePerLoop) return false; // Don't risk duplicating unordered loads + // Loads/stores with an invariant.group metadata are ok to hoist/sink. + // However, preserving the metadata is more important than hoisting. So only + // hoist/sink when we can preserve the metadata, which is when the + // instruction is guaranteed to execute in the loop. + if (SafetyInfo) { + if (LI->hasMetadata(LLVMContext::MD_invariant_group) && + SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) { + return true; + } + } + // This checks for an invariant.start dominating the load. if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp --- a/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -329,7 +329,7 @@ assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false, - LICMFlags.get())) + nullptr, LICMFlags.get())) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, MSSAU.get())) diff --git a/llvm/test/Transforms/LICM/invariant.group.ll b/llvm/test/Transforms/LICM/invariant.group.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LICM/invariant.group.ll @@ -0,0 +1,94 @@ +; RUN: opt -S < %s -passes=licm | FileCheck %s + +declare i8* @llvm.launder.invariant.group.p0i8(i8* %a) + +; CHECK-LABEL: define{{.*}}@f +define void @f(i32* %x) { +; CHECK: entry: +; CHECK-NOT: {{.*}}: +; CHECK: load {{.*}} !invariant.group +entry: + %x_i8 = bitcast i32* %x to i8* + %x_i8_inv = call i8* @llvm.launder.invariant.group.p0i8(i8* %x_i8) + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + + %x_inv = bitcast i8* %x_i8_inv to i32* + %0 = load i32, i32* %x_inv, !invariant.group !0 + + call void @a(i32 %0) + %inc = add nuw nsw i32 %i, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare i1 @p(i32) + +; CHECK-LABEL: define{{.*}}@f2 +; load is not unconditionally run within loop, so do not hoist in order to preserve metadata +define void @f2(i32* dereferenceable(4) align(4) %x, i1 %b) { +; CHECK: for.body.load: +; CHECK-NOT: {{.*}}: +; CHECK: load {{.*}} !invariant.group +entry: + %x_i8 = bitcast i32* %x to i8* + %x_i8_inv = call i8* @llvm.launder.invariant.group.p0i8(i8* %x_i8) + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body.end ] + + %p = call i1 @p(i32 %i) + br i1 %p, label %for.body.load, label %for.body.end + +for.body.load: + %x_inv = bitcast i8* %x_i8_inv to i32* + %0 = load i32, i32* %x_inv, !invariant.group !0 + + call void @a(i32 %0) + + br label %for.body.end + +for.body.end: + %inc = add nuw nsw i32 %i, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: define{{.*}}@g +define void @g(i32* %x) { +; CHECK: for.body: +; CHECK-NOT: {{.*}}: +; CHECK: load {{.*}} !invariant.group +entry: + %x_i8 = bitcast i32* %x to i8* + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + + %x_i8_inv = call i8* @llvm.launder.invariant.group.p0i8(i8* %x_i8) + %x_inv = bitcast i8* %x_i8_inv to i32* + + %0 = load i32, i32* %x_inv, !invariant.group !0 + + call void @a(i32 %0) + %inc = add nuw nsw i32 %i, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare void @a(i32) + +!0 = !{}