Index: include/llvm/Analysis/Loads.h =================================================================== --- include/llvm/Analysis/Loads.h +++ include/llvm/Analysis/Loads.h @@ -82,7 +82,6 @@ BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, - AAMDNodes *AATags = nullptr, bool *IsLoadCSE = nullptr); } Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -322,11 +322,13 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags, - bool *IsLoadCSE) { + AliasAnalysis *AA, bool *IsLoadCSE) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + if (IsLoadCSE) + *IsLoadCSE = false; + Value *Ptr = Load->getPointerOperand(); Type *AccessTy = Load->getType(); @@ -373,8 +375,6 @@ if (LI->isAtomic() < Load->isAtomic()) return nullptr; - if (AATags) - LI->getAAMetadata(*AATags); if (IsLoadCSE) *IsLoadCSE = true; return LI; @@ -394,8 +394,6 @@ if (SI->isAtomic() < Load->isAtomic()) return nullptr; - if (AATags) - SI->getAAMetadata(*AATags); return SI->getOperand(0); } Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -818,11 +818,10 @@ // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI(LI); - AAMDNodes AATags; bool IsLoadCSE = false; if (Value *AvailableVal = FindAvailableLoadedValue(&LI, LI.getParent(), BBI, - DefMaxInstsToScan, AA, &AATags, &IsLoadCSE)) { + DefMaxInstsToScan, AA, &IsLoadCSE)) { if (IsLoadCSE) { LoadInst *NLI = cast(AvailableVal); unsigned KnownIDs[] = { Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -946,13 +946,25 @@ // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. BasicBlock::iterator BBIt(LI); - + bool IsLoadCSE; if (Value *AvailableVal = - FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan)) { + FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; + if (IsLoadCSE) { + LoadInst *NLI = cast(AvailableVal); + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; + combineMetadata(NLI, LI, KnownIDs); + }; + // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); @@ -979,6 +991,7 @@ typedef SmallVector, 8> AvailablePredsTy; AvailablePredsTy AvailablePreds; BasicBlock *OneUnavailablePred = nullptr; + SmallVector CSELoads; // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. @@ -989,17 +1002,17 @@ // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - AAMDNodes ThisAATags; Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt, DefMaxInstsToScan, - nullptr, &ThisAATags); + nullptr, + &IsLoadCSE); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } - // If AA tags disagree or are not present, forget about them. - if (AATags != ThisAATags) AATags = AAMDNodes(); + if (IsLoadCSE) + CSELoads.push_back(cast(PredAvailable)); // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -1096,6 +1109,17 @@ PN->addIncoming(PredV, I->first); } + for (LoadInst *PredLI : CSELoads) { + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; + combineMetadata(PredLI, LI, KnownIDs); + } + //cerr << "PRE: " << *LI << *PN << "\n"; LI->replaceAllUsesWith(PN); Index: test/Transforms/JumpThreading/thread-loads.ll =================================================================== --- test/Transforms/JumpThreading/thread-loads.ll +++ test/Transforms/JumpThreading/thread-loads.ll @@ -107,7 +107,70 @@ ret i32 13 } +; Make sure we merge the aliasing metadata. (If we don't, we have a load +; with the wrong metadata, so the branch gets incorrectly eliminated.) +define void @test4(i32*, i32*, i32*) { +; CHECK-LABEL: @test4( +; CHECK: %a = load i32, i32* %0, !range !4 +; CHECK-NEXT: store i32 %a +; CHECK: br i1 %c + %a = load i32, i32* %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10 + %b = load i32, i32* %0, !range !5 + store i32 %a, i32* %1 + %c = icmp eq i32 %b, 8 + br i1 %c, label %ret1, label %ret2 + +ret1: + ret void + +ret2: + %xxx = tail call i32 (...) @f1() nounwind + ret void +} + +; Make sure we merge/PRE aliasing metadata correctly. That means that +; we need to remove metadata from the existing load, and add appropriate +; metadata to the newly inserted load. +define void @test5(i32*, i32*, i32*, i1 %c) { +; CHECK-LABEL: @test5( + br i1 %c, label %d1, label %d2 + +; CHECK: d1: +; CHECK-NEXT: %a = load i32, i32* %0{{$}} +d1: + %a = load i32, i32* %0, !range !4, !alias.scope !9, !noalias !10 + br label %d3 + +; CHECK: d2: +; CHECK-NEXT: %xxxx = tail call i32 (...) @f1() +; CHECK-NEXT: %b.pr = load i32, i32* %0, !tbaa !0{{$}} +d2: + %xxxx = tail call i32 (...) @f1() nounwind + br label %d3 + +d3: + %p = phi i32 [ 1, %d2 ], [ %a, %d1 ] + %b = load i32, i32* %0, !tbaa !0 + store i32 %p, i32* %1 + %c2 = icmp eq i32 %b, 8 + br i1 %c2, label %ret1, label %ret2 + +ret1: + ret void + +ret2: + %xxx = tail call i32 (...) @f1() nounwind + ret void +} + !0 = !{!3, !3, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA", null} !3 = !{!"int", !1} +!4 = !{ i32 0, i32 1 } +!5 = !{ i32 8, i32 10 } +!6 = !{!6} +!7 = !{!7, !6} +!8 = !{!8, !6} +!9 = !{!7} +!10 = !{!8}