Index: llvm/trunk/include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/JumpThreading.h +++ llvm/trunk/include/llvm/Transforms/Scalar/JumpThreading.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -60,6 +61,7 @@ class JumpThreadingPass : public PassInfoMixin { TargetLibraryInfo *TLI; LazyValueInfo *LVI; + AliasAnalysis *AA; std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = false; @@ -90,7 +92,8 @@ // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, - bool HasProfileData_, std::unique_ptr BFI_, + AliasAnalysis *AA_, bool HasProfileData_, + std::unique_ptr BFI_, std::unique_ptr BPI_); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); Index: llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" @@ -91,6 +92,7 @@ bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -106,6 +108,7 @@ "Jump Threading", false, false) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) @@ -123,6 +126,7 @@ return false; auto TLI = &getAnalysis().getTLI(); auto LVI = &getAnalysis().getLVI(); + auto AA = &getAnalysis().getAAResults(); std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = F.getEntryCount().hasValue(); @@ -131,7 +135,8 @@ BPI.reset(new BranchProbabilityInfo(F, LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - return Impl.runImpl(F, TLI, LVI, HasProfileData, std::move(BFI), + + return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), std::move(BPI)); } @@ -140,6 +145,8 @@ auto &TLI = AM.getResult(F); auto &LVI = AM.getResult(F); + auto &AA = AM.getResult(F); + std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = F.getEntryCount().hasValue(); @@ -148,8 +155,9 @@ BPI.reset(new BranchProbabilityInfo(F, LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = - runImpl(F, &TLI, &LVI, HasProfileData, std::move(BFI), std::move(BPI)); + + bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI), + std::move(BPI)); if (!Changed) return PreservedAnalyses::all(); @@ -159,13 +167,15 @@ } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, - LazyValueInfo *LVI_, bool HasProfileData_, + LazyValueInfo *LVI_, AliasAnalysis *AA_, + bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; + AA = AA_; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -953,8 +963,8 @@ // the entry to its block. BasicBlock::iterator BBIt(LI); bool IsLoadCSE; - if (Value *AvailableVal = - FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) { + if (Value *AvailableVal = FindAvailableLoadedValue( + LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. @@ -1001,9 +1011,8 @@ // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); unsigned NumScanedInst = 0; - Value *PredAvailable = - FindAvailableLoadedValue(LI, PredBB, BBIt, DefMaxInstsToScan, nullptr, - &IsLoadCSE, &NumScanedInst); + Value *PredAvailable = FindAvailableLoadedValue( + LI, PredBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the single // predecessor. @@ -1014,8 +1023,8 @@ if (SinglePredBB) { BBIt = SinglePredBB->end(); PredAvailable = FindAvailableLoadedValue( - LI, SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), - nullptr, &IsLoadCSE, &NumScanedInst); + LI, SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), AA, + &IsLoadCSE, &NumScanedInst); } } Index: llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll +++ llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -jump-threading -S | FileCheck %s -; RUN: opt < %s -passes=jump-threading -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=jump-threading -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin7" @@ -302,6 +302,115 @@ ret void } +define i32 @fn_noalias(i1 %c2,i64* noalias %P, i64* noalias %P2) { +; CHECK-LABEL: @fn_noalias +; CHECK-LABEL: cond1: +; CHECK: %[[LD1:.*]] = load i64, i64* %P +; CHECK: br i1 %c, label %[[THREAD:.*]], label %end +; CHECK-LABEL: cond2: +; CHECK: %[[LD2:.*]] = load i64, i64* %P +; CHECK-LABEL: cond3: +; CHECK: %[[PHI:.*]] = phi i64 [ %[[LD1]], %[[THREAD]] ], [ %[[LD2]], %cond2 ] +; CHECK: call void @fn3(i64 %[[PHI]]) +entry: + br i1 %c2, label %cond2, label %cond1 + +cond1: + %l1 = load i64, i64* %P + store i64 42, i64* %P2 + %c = icmp eq i64 %l1, 0 + br i1 %c, label %cond2, label %end + +cond2: + %l2 = load i64, i64* %P + call void @fn2(i64 %l2) + %c3 = icmp eq i64 %l2, 0 + br i1 %c3, label %cond3, label %end + +cond3: + call void @fn3(i64 %l2) + br label %end + +end: + ret i32 0 +} + +; This tests if we can thread from %sw.bb.i to %do.body.preheader.i67 through +; %sw.bb21.i. To make this happen, %l2 should be detected as a partically +; redundant load with %l3 across the store to %phase in %sw.bb21.i. + +%struct.NEXT_MOVE = type { i32, i32, i32* } +@hash_move = unnamed_addr global [65 x i32] zeroinitializer, align 4 +@current_move = internal global [65 x i32] zeroinitializer, align 4 +@last = internal unnamed_addr global [65 x i32*] zeroinitializer, align 8 +@next_status = internal unnamed_addr global [65 x %struct.NEXT_MOVE] zeroinitializer, align 8 +define fastcc i32 @Search(i64 %idxprom.i, i64 %idxprom.i89, i32 %c) { +; CHECK-LABEL: @Search +; CHECK-LABEL: sw.bb.i: +; CHECK: %[[LD1:.*]] = load i32, i32* %arrayidx185, align 4 +; CHECK: %[[C1:.*]] = icmp eq i32 %[[LD1]], 0 +; CHECK: br i1 %[[C1]], label %sw.bb21.i.thread, label %if.then.i64 +; CHECK-LABEL: sw.bb21.i.thread: +; CHECK: br label %[[THREAD_TO:.*]] +; CHECK-LABEL: sw.bb21.i: +; CHECK: %[[LD2:.*]] = load i32, i32* %arrayidx185, align 4 +; CHECK: %[[C2:.*]] = icmp eq i32 %[[LD2]], 0 +; CHECK:br i1 %[[C2]], label %[[THREAD_TO]], label %cleanup +entry: + %arrayidx185 = getelementptr inbounds [65 x i32], [65 x i32]* @hash_move, i64 0, i64 %idxprom.i + %arrayidx307 = getelementptr inbounds [65 x i32], [65 x i32]* @current_move, i64 0, i64 %idxprom.i + %arrayidx89 = getelementptr inbounds [65 x i32*], [65 x i32*]* @last, i64 0, i64 %idxprom.i + %phase = getelementptr inbounds [65 x %struct.NEXT_MOVE], [65 x %struct.NEXT_MOVE]* @next_status, i64 0, i64 %idxprom.i, i32 0 + br label %cond.true282 + +cond.true282: + switch i32 %c, label %sw.default.i [ + i32 1, label %sw.bb.i + i32 0, label %sw.bb21.i + ] + +sw.default.i: + br label %cleanup + +sw.bb.i: + %call.i62 = call fastcc i32* @GenerateCheckEvasions() + store i32* %call.i62, i32** %arrayidx89, align 8 + %l2 = load i32, i32* %arrayidx185, align 4 + %tobool.i63 = icmp eq i32 %l2, 0 + br i1 %tobool.i63, label %sw.bb21.i, label %if.then.i64 + +if.then.i64: ; preds = %sw.bb.i + store i32 7, i32* %phase, align 8 + store i32 %l2, i32* %arrayidx307, align 4 + %call16.i = call fastcc i32 @ValidMove(i32 %l2) + %tobool17.i = icmp eq i32 %call16.i, 0 + br i1 %tobool17.i, label %if.else.i65, label %cleanup + +if.else.i65: + call void @f65() + br label %sw.bb21.i + +sw.bb21.i: + store i32 10, i32* %phase, align 8 + %l3= load i32, i32* %arrayidx185, align 4 + %tobool27.i = icmp eq i32 %l3, 0 + br i1 %tobool27.i, label %do.body.preheader.i67, label %cleanup + +do.body.preheader.i67: + call void @f67() + ret i32 67 + +cleanup: + call void @Cleanup() + ret i32 0 +} + +declare fastcc i32* @GenerateCheckEvasions() +declare fastcc i32 @ValidMove(i32 %move) +declare void @f67() +declare void @Cleanup() +declare void @f65() + define i32 @fn_SinglePred(i1 %c2,i64* %P) { ; CHECK-LABEL: @fn_SinglePred ; CHECK-LABEL: entry: