Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -914,6 +914,14 @@ return false; } +/// Return true if Op is an instruction defined in the given block. +static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) { + if (Instruction *OpInst = dyn_cast(Op)) + if (OpInst->getParent() == BB) + return true; + return false; +} + /// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant /// load instruction, eliminate it by replacing it with a PHI node. This is an /// important optimization that encourages jump threading, and needs to be run @@ -936,11 +944,10 @@ Value *LoadedPtr = LI->getOperand(0); - // If the loaded operand is defined in the LoadBB, it can't be available. - // TODO: Could do simple PHI translation, that would be fun :) - if (Instruction *PtrOp = dyn_cast(LoadedPtr)) - if (PtrOp->getParent() == LoadBB) - return false; + // If the loaded operand is defined in the LoadBB and its not a phi, + // it can't be available in predecessors. + if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa(LoadedPtr)) + return false; // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. @@ -991,24 +998,31 @@ if (!PredsScanned.insert(PredBB).second) continue; - // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); unsigned NumScanedInst = 0; - Value *PredAvailable = - FindAvailableLoadedValue(LI, PredBB, BBIt, DefMaxInstsToScan, nullptr, - &IsLoadCSE, &NumScanedInst); - - // If PredBB has a single predecessor, continue scanning through the single - // precessor. + Value *PredAvailable = nullptr; + // NOTE: We don't CSE load that is volatile or anything stronger than + // unordered, that should have been checked when we entered the function. + assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads"); + // If this is a load on a phi pointer, phi-translate it and search + // for available load/store to the pointer in predecessors. + Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB); + PredAvailable = FindAvailablePtrLoadStore( + Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan, + nullptr, &IsLoadCSE, &NumScanedInst); + + // If PredBB has a single predecessor, continue scanning through the + // single precessor. BasicBlock *SinglePredBB = PredBB; while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() && NumScanedInst < DefMaxInstsToScan) { SinglePredBB = SinglePredBB->getSinglePredecessor(); if (SinglePredBB) { BBIt = SinglePredBB->end(); - PredAvailable = FindAvailableLoadedValue( - LI, SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), - nullptr, &IsLoadCSE, &NumScanedInst); + PredAvailable = FindAvailablePtrLoadStore( + Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt, + (DefMaxInstsToScan - NumScanedInst), nullptr, &IsLoadCSE, + &NumScanedInst); } } @@ -1071,10 +1085,10 @@ if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - LoadInst *NewVal = - new LoadInst(LoadedPtr, LI->getName() + ".pr", false, - LI->getAlignment(), LI->getOrdering(), LI->getSynchScope(), - UnavailablePred->getTerminator()); + LoadInst *NewVal = new LoadInst( + LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), + LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(), + LI->getSynchScope(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); Index: test/Transforms/JumpThreading/thread-loads.ll =================================================================== --- test/Transforms/JumpThreading/thread-loads.ll +++ test/Transforms/JumpThreading/thread-loads.ll @@ -381,6 +381,41 @@ declare void @fn3(i64) +; Make sure we phi-translate and make the partially redundant load in +; merge fully redudant and then we can jump-thread the block with the +; store. +; +; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32* +; CHECK: merge.thread: +; CHECK: store +; CHECK: br label %left_x +; +; CHECK: left_x: +; CHECK-NEXT: ret i32 20 +define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32*) { + %cmp0 = icmp ne i32 %0, 0 + br i1 %cmp0, label %left, label %right + +left: + store i32 1, i32* %1, align 4 + br label %merge + +right: + br label %merge + +merge: + %phiptr = phi i32* [ %1, %left ], [ %2, %right ] + %newload = load i32, i32* %phiptr, align 4 + %cmp1 = icmp slt i32 %newload, 5 + br i1 %cmp1, label %left_x, label %right_x + +left_x: + ret i32 20 + +right_x: + ret i32 10 +} + !0 = !{!3, !3, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}