Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -935,12 +935,18 @@ return false; Value *LoadedPtr = LI->getOperand(0); - - // If the loaded operand is defined in the LoadBB, it can't be available. - // TODO: Could do simple PHI translation, that would be fun :) - if (Instruction *PtrOp = dyn_cast(LoadedPtr)) - if (PtrOp->getParent() == LoadBB) + PHINode *TranslatableLoadedPhiPtr = dyn_cast(LoadedPtr); + + if (Instruction *PtrOp = dyn_cast(LoadedPtr)) { + // If the loaded operand is a phi not inside LoadBB, we can not + // phi translate it. + if (TranslatableLoadedPhiPtr && PtrOp->getParent() != LoadBB) + TranslatableLoadedPhiPtr = nullptr; + // If the loaded operand is defined in the LoadBB and its not a phi, + // it can't be available. + if (!TranslatableLoadedPhiPtr && PtrOp->getParent() == LoadBB) return false; + } // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. @@ -991,24 +997,35 @@ if (!PredsScanned.insert(PredBB).second) continue; - // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); unsigned NumScanedInst = 0; - Value *PredAvailable = - FindAvailableLoadedValue(LI, PredBB, BBIt, DefMaxInstsToScan, nullptr, - &IsLoadCSE, &NumScanedInst); - - // If PredBB has a single predecessor, continue scanning through the single - // precessor. + Value *PredAvailable = nullptr; + // NOTE: We don't CSE load that is volatile or anything stronger than + // unordered, that should have been checked when we entered the function. + assert(isUnordered() && "Attempting to CSE volatile or atomic loads"); + // If this is a load on a phi pointer, phi-translate it and search + // for available load/store to the pointer in predecessors. + Value *Ptr = + TranslatableLoadedPhiPtr + ? TranslatableLoadedPhiPtr->getIncomingValueForBlock(PredBB) + : LI->getPointerOperand(); + + PredAvailable = FindAvailablePtrLoadStore( + Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan, + nullptr, &IsLoadCSE, &NumScanedInst); + + // If PredBB has a single predecessor, continue scanning through the + // single precessor. BasicBlock *SinglePredBB = PredBB; while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() && NumScanedInst < DefMaxInstsToScan) { SinglePredBB = SinglePredBB->getSinglePredecessor(); if (SinglePredBB) { BBIt = SinglePredBB->end(); - PredAvailable = FindAvailableLoadedValue( - LI, SinglePredBB, BBIt, (DefMaxInstsToScan - NumScanedInst), - nullptr, &IsLoadCSE, &NumScanedInst); + PredAvailable = FindAvailablePtrLoadStore( + Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt, + (DefMaxInstsToScan - NumScanedInst), nullptr, &IsLoadCSE, + &NumScanedInst); } } @@ -1071,9 +1088,13 @@ if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); + Value *Ptr = TranslatableLoadedPhiPtr + ? TranslatableLoadedPhiPtr->getIncomingValueForBlock( + UnavailablePred) + : LoadedPtr; LoadInst *NewVal = - new LoadInst(LoadedPtr, LI->getName() + ".pr", false, - LI->getAlignment(), LI->getOrdering(), LI->getSynchScope(), + new LoadInst(Ptr, LI->getName() + ".pr", false, LI->getAlignment(), + LI->getOrdering(), LI->getSynchScope(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); if (AATags) Index: test/Transforms/JumpThreading/thread-loads.ll =================================================================== --- test/Transforms/JumpThreading/thread-loads.ll +++ test/Transforms/JumpThreading/thread-loads.ll @@ -381,6 +381,41 @@ declare void @fn3(i64) +; Make sure we phi-translate and make the partially redundant load in +; merge fully redudant and then we can jump-thread the block with the +; store. +; +; CHECK-LABEL: define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32* +; CHECK: merge.thread: +; CHECK: store +; CHECK: br label %left_x +; +; CHECK: left_x: +; CHECK-NEXT: ret i32 20 +define i32 @phi_translate_partial_redundant_loads(i32, i32*, i32*) { + %cmp0 = icmp ne i32 %0, 0 + br i1 %cmp0, label %left, label %right + +left: + store i32 1, i32* %1, align 4 + br label %merge + +right: + br label %merge + +merge: + %phiptr = phi i32* [ %1, %left ], [ %2, %right ] + %newload = load i32, i32* %phiptr, align 4 + %cmp1 = icmp slt i32 %newload, 5 + br i1 %cmp1, label %left_x, label %right_x + +left_x: + ret i32 20 + +right_x: + ret i32 10 +} + !0 = !{!3, !3, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}