diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1176,8 +1176,37 @@ if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; + auto MU = cast(MSSA->getMemoryAccess(LI)); + + if (LI->hasMetadata(LLVMContext::MD_invariant_group)) { + if (!Flags.getIsSink()) { + // If hoisting, we only need to check that there is no store + // to the loaded pointer between the start of the loop, and + // the load (since all values must be the same). + + MemoryAccess *Source; + // See declaration of SetLicmMssaOptCap for usage details. + if (Flags.tooManyClobberingCalls()) + Source = MU->getDefiningAccess(); + else { + Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); + Flags.incrementClobberingCalls(); + } + + // This can be checked in two conditions: + // 1) if the memoryaccess is outside the loop + // 2) the earliest access is at the loop header. + // if the memory loaded is the phi node + + return MSSA->isLiveOnEntryDef(Source) || + !CurLoop->contains(Source->getBlock()) || + (Source->getBlock() == CurLoop->getHeader() && isa(Source)); + } + } + + bool Invalidated = pointerInvalidatedByLoop( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags); + MSSA, MU, CurLoop, I, Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) diff --git a/llvm/test/Transforms/LICM/invariant.group.ll b/llvm/test/Transforms/LICM/invariant.group.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LICM/invariant.group.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=licm < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop-mssa(licm)' < %s -S | FileCheck %s + +define void @test(ptr %arg, ptr %arg1) { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb2: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARG1:%.*]], align 4, !invariant.group !0 +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, [[BB2:%.*]] ], [ [[TMP10:%.*]], [[BB5]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[TMP6]] +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP10]] = add nuw nsw i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[TMP10]], 200 +; CHECK-NEXT: br i1 [[TMP11]], label [[BB12:%.*]], label [[BB5]] +; CHECK: bb12: +; CHECK-NEXT: ret void +; +bb2: ; preds = %bb + br label %bb5 + +bb5: ; preds = %bb5, %bb2 + %tmp6 = phi i64 [ 0, %bb2 ], [ %tmp10, %bb5 ] + %tmp3 = load i32, i32* %arg1, align 4, !invariant.group !0 + %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp6 + store i32 %tmp3, ptr %tmp7, align 8 + %tmp10 = add nuw nsw i64 %tmp6, 1 + %tmp11 = icmp eq i64 %tmp10, 200 + br i1 %tmp11, label %bb12, label %bb5 + +bb12: ; preds = %bb5, %bb + ret void +} + + +define void @test_fail(ptr %arg, ptr %arg1) { +; CHECK-LABEL: @test_fail( +; CHECK-NEXT: bb2: +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, [[BB2:%.*]] ], [ [[TMP10:%.*]], [[BB5]] ] +; CHECK-NEXT: store i32 3, ptr [[ARG1:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARG1]], align 4, !invariant.group !0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[TMP6]] +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP10]] = add nuw nsw i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[TMP10]], 200 +; CHECK-NEXT: br i1 [[TMP11]], label [[BB12:%.*]], label [[BB5]] +; CHECK: bb12: +; CHECK-NEXT: ret void +; +bb2: ; preds = %bb + br label %bb5 + +bb5: ; preds = %bb5, %bb2 + %tmp6 = phi i64 [ 0, %bb2 ], [ %tmp10, %bb5 ] + store i32 3, ptr %arg1 + %tmp3 = load i32, i32* %arg1, align 4, !invariant.group !0 + %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp6 + store i32 %tmp3, ptr %tmp7, align 8 + %tmp10 = add nuw nsw i64 %tmp6, 1 + %tmp11 = icmp eq i64 %tmp10, 200 + br i1 %tmp11, label %bb12, label %bb5 + +bb12: ; preds = %bb5, %bb + ret void +} + +!0 = !{}