Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -1735,6 +1735,9 @@ // Clones of this instruction. Don't create more than one per exit block! SmallDenseMap SunkCopies; + AAMDNodes AAMetadata; + I.getAAMetadata(AAMetadata); + // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. @@ -1763,6 +1766,8 @@ // The PHI must be trivially replaceable. Instruction *New = sinkThroughTriviallyReplaceablePHI( PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); + New->setAAMetadata(AAMetadata); + New->setAAMetadataNoAliasProvenance(AAMetadata); PN->replaceAllUsesWith(New); eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr); Changed = true; @@ -1918,6 +1923,8 @@ NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); + // Note: ptr_provenance propagation is not done here. A dependend + // provenance should be migrated first ! if (MSSAU) { MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; @@ -2084,7 +2091,9 @@ if (SomePtr->getType() != ASIV->getType()) return false; - for (User *U : ASIV->users()) { + for (auto U_it = ASIV->user_begin(), U_it_end = ASIV->user_end(); + U_it != U_it_end; ++U_it) { + User *U = *U_it; // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast(U); if (!UI || !CurLoop->contains(UI)) @@ -2093,6 +2102,9 @@ // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *Load = dyn_cast(UI)) { + if (U_it.getUse().getOperandNo() == + Load->getNoaliasProvenanceOperandIndex()) + continue; if (!Load->isUnordered()) return false; @@ -2113,6 +2125,10 @@ Alignment = std::max(Alignment, InstAlignment); } } else if (const StoreInst *Store = dyn_cast(UI)) { + if (U_it.getUse().getOperandNo() == + Store->getNoaliasProvenanceOperandIndex()) + continue; + // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) @@ -2157,8 +2173,18 @@ Store->getPointerOperand(), Store->getValueOperand()->getType(), Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI); } - } else - return false; // Not a load or store. + } else { + // Not a load or store. + if (IntrinsicInst *II = dyn_cast(UI)) { + if (II->getIntrinsicID() == Intrinsic::provenance_noalias || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // those must not block promotion. + continue; + } + } + return false; + } // Merge the AA tags. if (LoopUses.empty()) { @@ -2243,8 +2269,11 @@ PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DebugLoc()); - if (AATags) + if (AATags) { PreheaderLoad->setAAMetadata(AATags); + // Note: ptr_provenance propagation is not done here. A dependend provenance + // should be migrated first ! + } SSA.AddAvailableValue(Preheader, PreheaderLoad); if (MSSAU) { Index: llvm/test/Transforms/LICM/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/noalias.ll @@ -0,0 +1,144 @@ +; RUN: opt -S -basic-aa -licm -enable-mssa-loop-dependency=false %s | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -S -basic-aa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop-mssa(licm)' < %s -S | FileCheck -check-prefixes=CHECK,MSSA %s + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_p, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_p, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %dec = add nsw i32 %n.addr.0, -1 + %cmp = icmp ne i32 %dec, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-LABEL: entry: +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; MSSA: store i32 42, i32* %_p, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-LABEL: do.body: +; CHECK-LABEL: do.end: +; AST: store i32 42, i32* %_p, align 4, !tbaa !9 +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_p, i32 %n) #0 { +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 42, i32* %_p, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 + %dec = add nsw i32 %n.addr.0, -1 + %cmp = icmp ne i32 %dec, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; CHECK-LABEL: @test02( +; CHECK-LABEL: entry: +; CHECK-LABEL: do.body: +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-LABEL: do.end: +; CHECK: store i32 42, i32* %_p, align 4, !tbaa !9 +; CHECK: ret void + +%struct.d = type { i8* } +%struct.f = type { i8* } + +; Function Attrs: nofree nounwind +define dso_local void @test03(%struct.d* nocapture readonly %h, %struct.f* %j) local_unnamed_addr addrspace(1) #0 !noalias !14 { +entry: + %e = getelementptr inbounds %struct.d, %struct.d* %h, i32 0, i32 0 + %0 = load i8*, i8** %e, align 4, !tbaa !17, !noalias !14 + %1 = tail call addrspace(1) i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %0, i8* null, i8** nonnull %e, i8** undef, i32 0, metadata !14), !tbaa !17, !noalias !14 + %e1 = getelementptr inbounds %struct.f, %struct.f* %j, i32 0, i32 0 + %e1.promoted = load i8*, i8** %e1, align 4, !tbaa !19, !noalias !14 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + %add.ptr.guard.guard.guard.lcssa = phi i8* [ %add.ptr.guard.guard.guard, %for.body ] + store i8* %add.ptr.guard.guard.guard.lcssa, i8** %e1, align 4, !tbaa !19, !noalias !14 + ret void + +for.body: ; preds = %entry, %for.body + %add.ptr.guard.guard.guard8 = phi i8* [ %e1.promoted, %entry ], [ %add.ptr.guard.guard.guard, %for.body ] + %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %2 = tail call addrspace(1) i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %add.ptr.guard.guard.guard8, i8* null, i8** nonnull %e1, i8** undef, i32 0, metadata !14), !tbaa !19, !noalias !14 + %.unpack.unpack = load i8, i8* %0, ptr_provenance i8* %1, align 1, !tbaa !21, !noalias !14 + store i8 %.unpack.unpack, i8* %add.ptr.guard.guard.guard8, ptr_provenance i8* %2, align 1, !tbaa !21, !noalias !14 + %add.ptr = getelementptr inbounds i8, i8* %add.ptr.guard.guard.guard8, i32 2 + %add.ptr.guard.guard.guard = tail call addrspace(1) i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* nonnull %add.ptr, i8* %2) + %inc = add nuw nsw i32 %i.07, 1 + %cmp = icmp ult i32 %i.07, 55 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: @test03 +; CHECK: entry: +; CHECK: @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %0, i8* null, i8** nonnull %e, i8** undef, i32 0, metadata !14), !tbaa !17, !noalias !14 +; CHECK: for.cond.cleanup: +; CHECK: @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %add.ptr.guard.guard.guard8.lcssa, i8* null, i8** nonnull %e1, i8** undef, i32 0, metadata !14), !tbaa !19, !noalias !14 +; CHECK: for.body: +; CHECK: @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %add.ptr.guard.guard.guard8, i8* null, i8** nonnull %e1, i8** undef, i32 0, metadata !14), !tbaa !19, !noalias !14 +; CHECK: } + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %0, i8* %1, i8** %2, i8** %3, i32 %4, metadata %5) addrspace(1) #2 + +; Function Attrs: nounwind readnone speculatable +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %0, i8* %1) addrspace(1) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: rp"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test02: rp"} +!13 = distinct !{!13, !"test02"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test03: unknown scope"} +!16 = distinct !{!16, !"test03"} +!17 = !{!18, !6, i64 0, i64 4} +!18 = !{!7, i64 4, !"d", !6, i64 0, i64 4} +!19 = !{!20, !6, i64 0, i64 4} +!20 = !{!7, i64 4, !"f", !6, i64 0, i64 4} +!21 = !{!22, !22, i64 0, i64 1} +!22 = !{!7, i64 1, !"b", !23, i64 0, i64 1} +!23 = !{!7, i64 1, !"a"}