Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -434,7 +434,6 @@ // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. - // if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && isSafeToExecuteUnconditionally( @@ -486,6 +485,19 @@ SafetyInfo->BlockColors = colorEHFunclets(*Fn); } +static bool isLoadInvariantGroupInLoop(LoadInst *LI, DominatorTree *DT, + Loop *CurLoop) { + if (!LI->getMetadata(LLVMContext::MD_invariant_group)) + return false; + + // TODO can I do this without casting to Instruction? + if (auto *PointerOperandInst = dyn_cast(LI->getPointerOperand())) + return DT->properlyDominates(PointerOperandInst->getParent(), + CurLoop->getHeader()); + return true; // If it is not an instruction then it always dominates + // TODO check if it actually happens. +} + // Return true if LI is invariant within scope of the loop. LI is invariant if // CurLoop is dominated by an invariant.start representing the same memory location // and size as the memory location LI loads from, and also the invariant.start @@ -555,6 +567,9 @@ if (LI->getMetadata(LLVMContext::MD_invariant_load)) return true; + if (isLoadInvariantGroupInLoop(LI, DT, CurLoop)) + return true; + // This checks for an invariant.start dominating the load. if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; @@ -859,7 +874,8 @@ // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) - I.dropUnknownNonDebugMetadata(); + I.dropUnknownNonDebugMetadata({LLVMContext::MD_invariant_group, + LLVMContext::MD_invariant_load}); // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); Index: test/Transforms/LICM/hoist-invariant-group-load.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/hoist-invariant-group-load.ll @@ -0,0 +1,102 @@ +; RUN: opt -licm -disable-basicaa -S < %s | FileCheck %s + +%struct.A = type { i32 (...)** } + +; CHECK-LABEL: define void @hoist +define void @hoist(%struct.A* dereferenceable(8)) { +entry: + %call1 = tail call i32 @bar() + %tobool2 = icmp eq i32 %call1, 0 + br i1 %tobool2, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %b = bitcast %struct.A* %0 to void (%struct.A*)*** +; CHECK-NOT: load {{.*}} !dereferenceable +; CHECK: %vtable = load {{.*}} %b, align 8, !invariant.group + +; CHECK-NEXT: br label %while.body + br label %while.body +; CHECK-NOT: load +while.body: ; preds = %while.body.lr.ph, %while.body + %vtable = load void (%struct.A*)**, void (%struct.A*)*** %b, align 8, !dereferenceable !1, !invariant.group !0 +; Can't hoist because hoisting of %vtable discards dereferenceable metadata +; CHECK: %1 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 8, !invariant.load + %1 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 8, !invariant.load !0 + tail call void %1(%struct.A* %0) + %call = tail call i32 @bar() + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + ret void +} + +; CHECK-LABEL: define void @hoist2( +define void @hoist2(i8** dereferenceable(8)) { +entry: + %call1 = tail call i32 @bar() + %tobool2 = icmp eq i32 %call1, 0 + br i1 %tobool2, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry +; CHECK: %x = load {{.*}} %0, align 8, !invariant.group +; CHECK: br label %while.body + br label %while.body +; CHECK-NOT: load +while.body: ; preds = %while.body.lr.ph, %while.body + %x = load i8*, i8** %0, align 8, !invariant.group !0 + call void @foo(i8* %x) + + %call = tail call i32 @bar() + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: + ret void +} + +declare void @foo(i8*) +declare i32 @bar() + +; CHECK-LABEL: define void @dontHoist +define void @dontHoist(%struct.A** %a) { +entry: + %call4 = tail call i32 @bar() + %cmp5 = icmp sgt i32 %call4, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +; CHECK: for.body: +for.body: ; preds = %for.body.preheader, %for.body +; CHECK: load {{.*}} !invariant.group + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds %struct.A*, %struct.A** %a, i64 %indvars.iv + %0 = load %struct.A*, %struct.A** %arrayidx, align 8 + %1 = bitcast %struct.A* %0 to void (%struct.A*)*** + %vtable = load void (%struct.A*)**, void (%struct.A*)*** %1, align 8, !dereferenceable !1, !invariant.group !0 + %2 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 8, !invariant.load !0 + tail call void %2(%struct.A* %0) + %indvars.iv.next = add nuw i64 %indvars.iv, 1 + %call = tail call i32 @bar() + %3 = sext i32 %call to i64 + %cmp = icmp slt i64 %indvars.iv.next, %3 + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit +} + + +!0 = !{} +!1 = !{i64 8} \ No newline at end of file