Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -731,7 +731,8 @@ // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) - I.dropUnknownNonDebugMetadata(); + I.dropUnknownNonDebugMetadata( + LLVMContext::MD_invariant_load); // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); Index: test/Transforms/LICM/hoist-invariant-load-twice.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/hoist-invariant-load-twice.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @work(i32, i32, i32) + +define void @loop(i32* dereferenceable(4) %ptr) { +; CHECK: entry: +; CHECK-NEXT: %val = load i32, i32* %ptr, !invariant.load !0 +; CHECK-NEXT: br label %loop1 +entry: + br label %loop1 + +loop1: + %ind1 = phi i32 [ 0, %entry ], [ %indnext1, %loop2 ] + %cont1 = icmp eq i32 %ind1, 10 + br i1 %cont1, label %loop2, label %exit + +loop2: + %ind2 = phi i32 [ 0, %loop1 ], [ %indnext2, %work ] + %indnext1 = add i32 %ind1, 1 + %cont2 = icmp eq i32 %ind2, 15 + br i1 %cont2, label %work, label %loop1 + +work: + %indnext2 = add i32 %ind2, 1 + %val = load i32, i32* %ptr, !invariant.load !0 + call void @work(i32 %ind1, i32 %ind2, i32 %val) + br label %loop2 + +exit: + ret void +} + +!0 = !{} +!1 = !{i64 4}