Index: llvm/trunk/include/llvm/Analysis/Loads.h =================================================================== --- llvm/trunk/include/llvm/Analysis/Loads.h +++ llvm/trunk/include/llvm/Analysis/Loads.h @@ -71,7 +71,7 @@ /// the only relevant load gets deleted.) /// /// \param Load The load we want to replace. -/// \param ScanBB The basic block to scan. FIXME: This is redundant. +/// \param ScanBB The basic block to scan. /// \param [in,out] ScanFrom The location to start scanning from. When this /// function returns, it points at the last instruction scanned. /// \param MaxInstsToScan The maximum number of instructions to scan. If this @@ -89,7 +89,6 @@ BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, - AAMDNodes *AATags = nullptr, bool *IsLoadCSE = nullptr); } Index: llvm/trunk/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/Local.h +++ llvm/trunk/include/llvm/Transforms/Utils/Local.h @@ -316,6 +316,12 @@ /// Metadata not listed as known via KnownIDs is removed void combineMetadata(Instruction *K, const Instruction *J, ArrayRef KnownIDs); +/// Combine the metadata of two instructions so that K can replace J. This +/// specifically handles the case of CSE-like transformations. +/// +/// Unknown metadata is removed. +void combineMetadataForCSE(Instruction *K, const Instruction *J); + /// Replace each use of 'From' with 'To' if that use is dominated by /// the given edge. Returns the number of replacements made. unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, Index: llvm/trunk/lib/Analysis/Loads.cpp =================================================================== --- llvm/trunk/lib/Analysis/Loads.cpp +++ llvm/trunk/lib/Analysis/Loads.cpp @@ -302,11 +302,11 @@ "to scan backward from a given instruction, when searching for " "available loaded value")); -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, + BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags, - bool *IsLoadCSE) { + AliasAnalysis *AA, bool *IsLoadCSE) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -356,8 +356,6 @@ if (LI->isAtomic() < Load->isAtomic()) return nullptr; - if (AATags) - LI->getAAMetadata(*AATags); if (IsLoadCSE) *IsLoadCSE = true; return LI; @@ -377,8 +375,8 @@ if (SI->isAtomic() < Load->isAtomic()) return nullptr; - if (AATags) - SI->getAAMetadata(*AATags); + if (IsLoadCSE) + *IsLoadCSE = false; return SI->getOperand(0); } Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -819,11 +819,10 @@ // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI(LI); - AAMDNodes AATags; bool IsLoadCSE = false; if (Value *AvailableVal = FindAvailableLoadedValue(&LI, LI.getParent(), BBI, - DefMaxInstsToScan, AA, &AATags, &IsLoadCSE)) { + DefMaxInstsToScan, AA, &IsLoadCSE)) { if (IsLoadCSE) { LoadInst *NLI = cast(AvailableVal); unsigned KnownIDs[] = { Index: llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp @@ -951,12 +951,17 @@ // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. BasicBlock::iterator BBIt(LI); - + bool IsLoadCSE; if (Value *AvailableVal = - FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan)) { + FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. + if (IsLoadCSE) { + LoadInst *NLI = cast(AvailableVal); + combineMetadataForCSE(NLI, LI); + }; + // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); @@ -983,6 +988,7 @@ typedef SmallVector, 8> AvailablePredsTy; AvailablePredsTy AvailablePreds; BasicBlock *OneUnavailablePred = nullptr; + SmallVector CSELoads; // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. @@ -993,17 +999,17 @@ // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - AAMDNodes ThisAATags; Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt, DefMaxInstsToScan, - nullptr, &ThisAATags); + nullptr, + &IsLoadCSE); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } - // If AA tags disagree or are not present, forget about them. - if (AATags != ThisAATags) AATags = AAMDNodes(); + if (IsLoadCSE) + CSELoads.push_back(cast(PredAvailable)); // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -1101,6 +1107,10 @@ PN->addIncoming(PredV, I->first); } + for (LoadInst *PredLI : CSELoads) { + combineMetadataForCSE(PredLI, LI); + } + LI->replaceAllUsesWith(PN); LI->eraseFromParent(); Index: llvm/trunk/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/Local.cpp +++ llvm/trunk/lib/Transforms/Utils/Local.cpp @@ -1646,6 +1646,17 @@ K->setMetadata(LLVMContext::MD_invariant_group, JMD); } +void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; + combineMetadata(K, J, KnownIDs); +} + unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root) { Index: llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll +++ llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll @@ -246,7 +246,70 @@ ret i32 %res.0 } +; Make sure we merge the aliasing metadata. (If we don't, we have a load +; with the wrong metadata, so the branch gets incorrectly eliminated.) +define void @test8(i32*, i32*, i32*) { +; CHECK-LABEL: @test8( +; CHECK: %a = load i32, i32* %0, !range !4 +; CHECK-NEXT: store i32 %a +; CHECK: br i1 %c + %a = load i32, i32* %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10 + %b = load i32, i32* %0, !range !5 + store i32 %a, i32* %1 + %c = icmp eq i32 %b, 8 + br i1 %c, label %ret1, label %ret2 + +ret1: + ret void + +ret2: + %xxx = tail call i32 (...) @f1() nounwind + ret void +} + +; Make sure we merge/PRE aliasing metadata correctly. That means that +; we need to remove metadata from the existing load, and add appropriate +; metadata to the newly inserted load. +define void @test9(i32*, i32*, i32*, i1 %c) { +; CHECK-LABEL: @test9( + br i1 %c, label %d1, label %d2 + +; CHECK: d1: +; CHECK-NEXT: %a = load i32, i32* %0{{$}} +d1: + %a = load i32, i32* %0, !range !4, !alias.scope !9, !noalias !10 + br label %d3 + +; CHECK: d2: +; CHECK-NEXT: %xxxx = tail call i32 (...) @f1() +; CHECK-NEXT: %b.pr = load i32, i32* %0, !tbaa !0{{$}} +d2: + %xxxx = tail call i32 (...) @f1() nounwind + br label %d3 + +d3: + %p = phi i32 [ 1, %d2 ], [ %a, %d1 ] + %b = load i32, i32* %0, !tbaa !0 + store i32 %p, i32* %1 + %c2 = icmp eq i32 %b, 8 + br i1 %c2, label %ret1, label %ret2 + +ret1: + ret void + +ret2: + %xxx = tail call i32 (...) @f1() nounwind + ret void +} + !0 = !{!3, !3, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA", null} !3 = !{!"int", !1} +!4 = !{ i32 0, i32 1 } +!5 = !{ i32 8, i32 10 } +!6 = !{!6} +!7 = !{!7, !6} +!8 = !{!8, !6} +!9 = !{!7} +!10 = !{!8}