Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -388,13 +388,16 @@ /// /// Metadata not listed as known via KnownIDs is removed void combineMetadata(Instruction *K, const Instruction *J, - ArrayRef KnownIDs, bool DoesKMove = false); + ArrayRef KnownIDs, bool DoesKMove); /// Combine the metadata of two instructions so that K can replace J. This -/// specifically handles the case of CSE-like transformations. +/// specifically handles the case of CSE-like transformations. Some +/// metadata can only be kept if K dominates J. For this to be correct, +/// K cannot be hoisted. /// /// Unknown metadata is removed. -void combineMetadataForCSE(Instruction *K, const Instruction *J); +void combineMetadataForCSE(Instruction *K, const Instruction *J, + bool DoesKMove); /// Patch the replacement so that it is not more restrictive than the value /// being replaced. Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1024,7 +1024,7 @@ if (Value *AvailableVal = FindAvailableLoadedValue( &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) { if (IsLoadCSE) - combineMetadataForCSE(cast(AvailableVal), &LI); + combineMetadataForCSE(cast(AvailableVal), &LI, true); return replaceInstUsesWith( LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(), Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -616,7 +616,7 @@ // Add all operands to the new PHI and combine TBAA metadata. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = cast(PN.getIncomingValue(i)); - combineMetadata(NewLI, LI, KnownIDs); + combineMetadata(NewLI, LI, KnownIDs, false); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; Index: lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- lib/Transforms/Scalar/GVNHoist.cpp +++ lib/Transforms/Scalar/GVNHoist.cpp @@ -247,7 +247,7 @@ LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group}; - combineMetadata(ReplInst, I, KnownIDs); + combineMetadata(ReplInst, I, KnownIDs, false); } // This pass hoists common computations across branches sharing common Index: lib/Transforms/Scalar/GVNSink.cpp =================================================================== --- lib/Transforms/Scalar/GVNSink.cpp +++ lib/Transforms/Scalar/GVNSink.cpp @@ -859,7 +859,7 @@ // Update metadata and IR flags. for (auto *I : Insts) if (I != I0) { - combineMetadataForCSE(I0, I); + combineMetadataForCSE(I0, I, false); I0->andIRFlags(I); } Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -1296,7 +1296,7 @@ if (IsLoadCSE) { LoadInst *NLoadI = cast(AvailableVal); - combineMetadataForCSE(NLoadI, LoadI); + combineMetadataForCSE(NLoadI, LoadI, true); }; // If the returned value is the load itself, replace with an undef. This can @@ -1486,7 +1486,7 @@ } for (LoadInst *PredLoadI : CSELoads) { - combineMetadataForCSE(PredLoadI, LoadI); + combineMetadataForCSE(PredLoadI, LoadI, false); } LoadI->replaceAllUsesWith(PN); Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -994,7 +994,7 @@ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_invariant_group}; - combineMetadata(C, cpy, KnownIDs); + combineMetadata(C, cpy, KnownIDs, false); // Remove the memcpy. MD->removeInstruction(cpy); Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -2318,7 +2318,8 @@ K->setMetadata(LLVMContext::MD_invariant_group, JMD); } -void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { +void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, + bool KDominatesJ) { unsigned KnownIDs[] = { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, LLVMContext::MD_range, @@ -2326,7 +2327,7 @@ LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null}; - combineMetadata(K, J, KnownIDs); + combineMetadata(K, J, KnownIDs, KDominatesJ); } void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -1316,7 +1316,7 @@ LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access}; - combineMetadata(I1, I2, KnownIDs); + combineMetadata(I1, I2, KnownIDs, false); // I1 and I2 are being combined into a single instruction. Its debug // location is the merged locations of the original instructions. @@ -1582,7 +1582,7 @@ // However, as N-way merge for CallInst is rare, so we use simplified API // instead of using complex API for N-way merge. I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); - combineMetadataForCSE(I0, I); + combineMetadataForCSE(I0, I, false); I0->andIRFlags(I); } Index: test/Transforms/GVNHoist/hoist-md.ll =================================================================== --- test/Transforms/GVNHoist/hoist-md.ll +++ test/Transforms/GVNHoist/hoist-md.ll @@ -93,6 +93,29 @@ ; CHECK: %[[phi:.*]] = phi i32 [ %[[load]], %{{.*}} ], [ %[[load]], %{{.*}} ] ; CHECK: ret i32 %[[phi]] +define i32* @test5(i1 %b, i32** %y) { +entry: + br i1 %b, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = load i32*, i32** %y, align 4, !nonnull !9 + br label %return + +if.end: ; preds = %entry + %1 = load i32*, i32** %y, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32* [ %0, %if.then ], [ %1, %if.end ] + ret i32* %retval.0 +} +; CHECK-LABEL: define i32* @test5( +; CHECK: %[[load:.*]] = load i32*, i32** %y, align 4 +; CHECK-NOT: !nonnull +; CHECK: %[[phi:.*]] = phi i32* [ %[[load]], %{{.*}} ], [ %[[load]], %{{.*}} ] +; CHECK: ret i32* %[[phi]] + !7 = !{i32 0, i32 2} !8 = !{i32 3, i32 4} +!9 = !{} ; CHECK: ![[range_md]] = !{i32 0, i32 2, i32 3, i32 4} Index: test/Transforms/GVNSink/sink-combine-metadata.ll =================================================================== --- /dev/null +++ test/Transforms/GVNSink/sink-combine-metadata.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -gvn-sink -S | FileCheck %s + +; Check that nonnull metadata for non-dominating loads is not propagated. +; CHECK-LABEL: @test1( +; CHECK-LABEL: if.end: +; CHECK: %[[ptr:.*]] = phi i32** +; CHECK: %[[load:.*]] = load i32*, i32** %[[ptr]] +; CHECK-NOT: !nonnull +; CHECK: ret i32* %[[load]] +define i32* @test1(i1 zeroext %flag, i32*** %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = load i32**, i32*** %p + %aa = load i32*, i32** %a, !nonnull !0 + br label %if.end + +if.else: + %b = load i32**, i32*** %p + %bb= load i32*, i32** %b + br label %if.end + +if.end: + %c = phi i32* [ %aa, %if.then ], [ %bb, %if.else ] + ret i32* %c +} + +; CHECK-LABEL: @test2( +; CHECK-LABEL: if.end: +; CHECK: %[[ptr:.*]] = phi i32** +; CHECK: %[[load:.*]] = load i32*, i32** %[[ptr]] +; CHECK-NOT: !nonnull +; CHECK: ret i32* %[[load]] +define i32* @test2(i1 zeroext %flag, i32*** %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = load i32**, i32*** %p + %aa = load i32*, i32** %a + br label %if.end + +if.else: + %b = load i32**, i32*** %p + %bb= load i32*, i32** %b, !nonnull !0 + br label %if.end + +if.end: + %c = phi i32* [ %aa, %if.then ], [ %bb, %if.else ] + ret i32* %c +} + + +!0 = !{} Index: test/Transforms/InstCombine/load-combine-metadata-dominance.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/load-combine-metadata-dominance.ll @@ -0,0 +1,44 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Check that nonnull metadata is propagated from dominating load. +; CHECK-LABEL: @combine_metadata_dominance1( +; CHECK-LABEL: bb1: +; CHECK: load i32*, i32** %p, align 8, !nonnull !0 +; CHECK-NOT: load i32*, i32** %p +define void @combine_metadata_dominance1(i32** %p) { +entry: + %a = load i32*, i32** %p, !nonnull !0 + br label %bb1 + +bb1: + %b = load i32*, i32** %p + store i32 0, i32* %a + store i32 0, i32* %b + ret void +} + +declare i32 @use(i32*, i32) readonly + +; Check that nonnull from the dominated load does not get propagated. +; There are some cases where it would be safe to keep it. +; CHECK-LABEL: @combine_metadata_dominance2( +; CHECK-NOT: nonnull +define void @combine_metadata_dominance2(i32** %p) { +entry: + %a = load i32*, i32** %p + br i1 undef, label %bb1, label %bb2 + +bb1: + %b = load i32*, i32** %p, !nonnull !0 + store i32 0, i32* %a + store i32 0, i32* %b + ret void + +bb2: + ret void +} + + +!0 = !{} Index: test/Transforms/InstCombine/phi-load-metadata-dominance.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/phi-load-metadata-dominance.ll @@ -0,0 +1,26 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +declare void @bar() +declare void @baz() + +; Check that nonnull metadata is from non-dominating loads is not propagated. +; CHECK-LABEL: cont: +; CHECK-NOT: !nonnull +define i32* @test_combine_metadata_dominance(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) { + br i1 %c, label %t, label %f +t: + call void @bar() + %v1 = load i32*, i32** %p1, align 8, !nonnull !0 + br label %cont + +f: + call void @baz() + %v2 = load i32*, i32** %p2, align 8 + br label %cont + +cont: + %res = phi i32* [ %v1, %t ], [ %v2, %f ] + ret i32* %res +} + +!0 = !{} Index: test/Transforms/JumpThreading/combine-metadata.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/combine-metadata.ll @@ -0,0 +1,122 @@ +; RUN: opt < %s -jump-threading -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +declare void @use(i32 *) + +; Check that we propagate nonnull to dominated loads, when we find an available +; loaded value. +; CHECK-LABEL: @test1( +; CHECK-LABEL: ret1: +; CHECK-NEXT: %[[p1:.*]] = load i32*, i32** %ptr +; CHECK-NOT: !nonnull +; CHECK-NEXT: store i32 1, i32* %[[p1]] +; CHECK-NEXT: tail call void @use(i32* null) +; CHECK-NEXT: ret void + +; CHECK-LABEL: ret2: +; CHECK-NEXT: %[[p2:.*]] = load i32*, i32** %ptr, !nonnull !0 +; CHECK: tail call void @use(i32* %[[p2]]) +; CHECK-NEXT: ret void +define void @test1(i32** %ptr, i1 %c) { + br i1 %c, label %d1, label %d2 + +d1: + %p1 = load i32*, i32** %ptr, !nonnull !0 + br label %d3 + +d2: + br label %d3 + +d3: + %pm = phi i32* [ null, %d2 ], [ %p1, %d1 ] + %p2 = load i32*, i32** %ptr + store i32 1, i32* %p2 + %c2 = icmp eq i32* %pm, null + br i1 %c2, label %ret1, label %ret2 + +ret1: + tail call void @use(i32* %pm) nounwind + ret void + +ret2: + tail call void @use(i32* %pm) nounwind + ret void +} + +; Check that we propagate nonnull to dominated loads, when we find an available +; loaded value. +; CHECK-LABEL: @test2( +; CHECK-LABEL: d3.thread: +; CHECK-NEXT: %[[p1:.*]] = load i32*, i32** %ptr, !nonnull !0 +; CHECK-NEXT: store i32 1, i32* %[[p1]] +; CHECK-NEXT: br label %ret1 + +; CHECK-LABEL: d3: +; CHECK-NEXT: %[[p_cmp:.*]] = load i32*, i32** %ptr +; CHECK-NEXT: %[[p2:.*]] = load i32*, i32** %ptr, !nonnull !0 +; CHECK-NEXT: store i32 1, i32* %[[p2]] +; CHECK-NEXT: icmp eq i32* %[[p_cmp]], null +define void @test2(i32** %ptr, i1 %c) { + br i1 %c, label %d1, label %d2 + +d1: + %p1 = load i32*, i32** %ptr + br label %d3 + +d2: + br label %d3 + +d3: + %pm = phi i32* [ null, %d2 ], [ %p1, %d1 ] + %p2 = load i32*, i32** %ptr, !nonnull !0 + store i32 1, i32* %p2 + %c2 = icmp eq i32* %pm, null + br i1 %c2, label %ret1, label %ret2 + +ret1: + tail call void @use(i32* %pm) nounwind + ret void + +ret2: + tail call void @use(i32* %pm) nounwind + ret void +} + +; Check that we do not propagate nonnull to loads predecessors that are combined +; to a PHI node. +; CHECK-LABEL: @test3( +; CHECK-LABEL: d1: +; CHECK-NEXT: %[[p1:.*]] = load i32*, i32** %ptr +; CHECK-NOT: !nonnull + +; CHECK-LABEL: d2: +; CHECK-NEXT: %[[p2:.*]] = load i32*, i32** %ptr +; CHECK-NOT: !nonnull + +; CHECK-LABEL: d3: +; CHECK-NEXT: phi i32* [ %[[p2]], %d2 ], [ %[[p1]], %d1 ] +define void @test3(i32** %ptr) { +d1: + %x = load i32*, i32** %ptr, !nonnull !0 + br label %d3 + +d2: + br label %d3 + +d3: + %y = load i32*, i32** %ptr + store i32 1, i32* %y + %c2 = icmp eq i32* %y, null + br i1 %c2, label %ret1, label %ret2 + +ret1: + ret void + +ret2: + ret void +} + + +!0 = !{}