Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -152,6 +152,7 @@ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, ConstantPreference Preference, + bool &Changed, Instruction *CxtI = nullptr); bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, ConstantPreference Preference, @@ -395,10 +396,9 @@ /// /// This returns true if there were any known values. /// -bool JumpThreading:: -ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, - ConstantPreference Preference, - Instruction *CxtI) { +bool JumpThreading::ComputeValueKnownInPredecessors( + Value *V, BasicBlock *BB, PredValueInfo &Result, + ConstantPreference Preference, bool &Changed, Instruction *CxtI) { // This method walks up use-def chains recursively. Because of this, we could // get into an infinite loop going around loops in the use-def chain. To // prevent this, keep track of what (value, block) pairs we've already visited @@ -410,6 +410,16 @@ // stack pops back out again. RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); + // Simplify the instruction before inferring the value. + Instruction *I = dyn_cast(V); + if (I && !isa(I)) + if (auto *NewV = SimplifyInstruction(I, BB->getModule()->getDataLayout())) { + I->replaceAllUsesWith(NewV); + I->eraseFromParent(); + V = NewV; + Changed = true; + } + // If V is a constant, then it is known in all predecessors. if (Constant *KC = getKnownConstant(V, Preference)) { for (BasicBlock *Pred : predecessors(BB)) @@ -420,7 +430,7 @@ // If V is a non-instruction value, or an instruction in a different block, // then it can't be derived from a PHI. - Instruction *I = dyn_cast(V); + I = dyn_cast(V); if (!I || I->getParent() != BB) { // Okay, if this is a live-in value, see if it has a known value at the end @@ -475,9 +485,9 @@ if (I->getOpcode() == Instruction::Or || I->getOpcode() == Instruction::And) { ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, - WantInteger, CxtI); + WantInteger, Changed, CxtI); ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, - WantInteger, CxtI); + WantInteger, Changed, CxtI); if (LHSVals.empty() && RHSVals.empty()) return false; @@ -512,8 +522,8 @@ if (I->getOpcode() == Instruction::Xor && isa(I->getOperand(1)) && cast(I->getOperand(1))->isOne()) { - ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, - WantInteger, CxtI); + ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, WantInteger, + Changed, CxtI); if (Result.empty()) return false; @@ -531,7 +541,7 @@ if (ConstantInt *CI = dyn_cast(BO->getOperand(1))) { PredValueInfoTy LHSVals; ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals, - WantInteger, CxtI); + WantInteger, Changed, CxtI); // Try to use constant folding to simplify the binary operator. for (const auto &LHSVal : LHSVals) { @@ -608,7 +618,7 @@ if (Constant *CmpConst = dyn_cast(Cmp->getOperand(1))) { PredValueInfoTy LHSVals; ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, - WantInteger, CxtI); + WantInteger, Changed, CxtI); for (const auto &LHSVal : LHSVals) { Constant *V = LHSVal.first; @@ -631,7 +641,7 @@ PredValueInfoTy Conds; if ((TrueVal || FalseVal) && ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, - WantInteger, CxtI)) { + WantInteger, Changed, CxtI)) { for (auto &C : Conds) { Constant *Cond = C.first; @@ -1180,8 +1190,10 @@ return false; PredValueInfoTy PredValues; - if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI)) - return false; + bool Changed = false; + if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, + Changed, CxtI)) + return Changed; assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); @@ -1239,7 +1251,7 @@ // If all edges were unthreadable, we fail. if (PredToDestList.empty()) - return false; + return Changed; // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes @@ -1272,7 +1284,8 @@ getSuccessor(GetBestDestForJumpOnUndef(BB)); // Ok, try to thread it! - return ThreadEdge(BB, PredsToFactor, MostPopularDest); + Changed |= ThreadEdge(BB, PredsToFactor, MostPopularDest); + return Changed; } /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on @@ -1343,12 +1356,13 @@ PredValueInfoTy XorOpValues; bool isLHS = true; + bool Changed = false; if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues, - WantInteger, BO)) { + WantInteger, Changed, BO)) { assert(XorOpValues.empty()); if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues, - WantInteger, BO)) - return false; + WantInteger, Changed, BO)) + return Changed; isLHS = false; } @@ -1406,7 +1420,8 @@ } // Try to duplicate BB into PredBB. - return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); + Changed |= DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); + return Changed; } Index: test/Transforms/JumpThreading/basic.ll =================================================================== --- test/Transforms/JumpThreading/basic.ll +++ test/Transforms/JumpThreading/basic.ll @@ -476,6 +476,56 @@ ; CHECK: } } + +;;; Verify that we can handle constraint propagation through cast. +define i32 @test16(i1 %cond) { +Entry: +; CHECK-LABEL: @test16( + br i1 %cond, label %Merge, label %F1 + +; CHECK: Entry: +; CHECK-NEXT: br i1 %cond, label %F2, label %Merge + +F1: + %v1 = call i32 @f1() + br label %Merge + +Merge: + %B = phi i32 [1, %Entry], [%v1, %F1] + %M = icmp eq i32 %B, 0 + %M1 = zext i1 %M to i32 + %N = icmp eq i32 %M1, 1 + br i1 %N, label %T2, label %F2 + +; CHECK: Merge: +; CHECK-NOT: phi +; CHECK-NEXT: %v1 = call i32 @f1() + +T2: + %Q = zext i1 %M to i32 + ret i32 %Q + +F2: + ret i32 %B +; CHECK: F2: +; CHECK-NEXT: phi i32 +} + +;;; Just check that ComputeValueKnownInPredecessors() does not return true with +;;; no values and triggers the assert in ProcessThreadableEdges(). +define i32 @test17() { +entry: + %A = add i32 0, 1 + %B = icmp eq i32 %A, 0 + br i1 %B, label %T, label %F +T: + %v1 = call i32 @f1() + ret i32 %v1 +F: + %v2 = call i32 @f2() + ret i32 %v2 +} + ; In this test we check that block duplication is inhibited by the presence ; of a function with the 'noduplicate' attribute.