Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -14,12 +14,18 @@ #include "InstCombineInternal.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instcombine" +STATISTIC(NumPhiNodeUsersFullySimplified, + "Number of phi node users simplified into the phi inputs"); +STATISTIC(NumPhiNodeUsersHoisted, + "Number of phi node users hoisted into one of the inputs"); + /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the /// adds all have a single use, turn this into a phi and a single binop. Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { @@ -875,6 +881,237 @@ return ReplaceInstUsesWith(FirstPhi, Undef); } +/// Take an instruction and a replacement for one of its operands and try to +/// simplify that to a constant or some existing instruction. +// FIXME: Generalize this and move it to InstructionSimplify.h. +static Value *simplifyInstWithReplacedOperand( + Instruction &I, Value &Operand, Value &ReplacementOperand, + const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr) { + const DataLayout &DL = I.getModule()->getDataLayout(); + unsigned Opcode = I.getOpcode(); + SmallVector Operands(I.value_op_begin(), I.value_op_end()); + std::replace(Operands.begin(), Operands.end(), &Operand, &ReplacementOperand); + switch (Opcode) { + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + return SimplifyFPBinOp(Opcode, Operands[0], Operands[1], + I.getFastMathFlags(), DL, TLI, DT, AC, CxtI); + + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return SimplifyBinOp(Opcode, Operands[0], Operands[1], DL, TLI, DT, AC, + CxtI); + + case Instruction::ICmp: + return SimplifyICmpInst(cast(I).getPredicate(), Operands[0], + Operands[1], DL, TLI, DT, AC, CxtI); + + case Instruction::FCmp: + return SimplifyFCmpInst(cast(I).getPredicate(), Operands[0], + Operands[1], I.getFastMathFlags(), DL, TLI, DT, AC, + CxtI); + + case Instruction::Select: + return SimplifySelectInst(Operands[0], Operands[1], Operands[2], DL, TLI, + DT, AC, CxtI); + + case Instruction::GetElementPtr: + return SimplifyGEPInst(Operands, DL, TLI, DT, AC, CxtI); + + case Instruction::InsertValue: + return SimplifyInsertValueInst(Operands[0], Operands[1], + cast(I).getIndices(), DL, + TLI, DT, AC, CxtI); + + case Instruction::ExtractValue: + return SimplifyExtractValueInst(Operands[0], + cast(I).getIndices(), DL, + TLI, DT, AC, CxtI); + + case Instruction::ExtractElement: + return SimplifyExtractElementInst(Operands[0], Operands[1], DL, TLI, DT, AC, + CxtI); + + default: { + SmallVector ConstantOperands; + ConstantOperands.reserve(Operands.size()); + for (auto *Op : Operands) { + if (auto *C = dyn_cast(Op)) + ConstantOperands.push_back(C); + else + return nullptr; // Can't constant fold. + } + return ConstantFoldInstOperands(Opcode, I.getType(), ConstantOperands, DL, + TLI); + } + } +} + +/// See if there is a single user of a phi node that simplifies with all but +/// one of the phi operands. If so, hoist the user and use a phi of the +/// simplified values. Returns true if it was able to simplify the phi. +static bool simplifyOrHoistPhiUsers(InstCombiner &IC, PHINode &PN) { + // Currently we only handle single-use phi nodes. This actually generalizes + // cleanly to multi-use phi nodes, but in that case we will grow the total + // number of phi nodes which might not be a clear win. + // TODO: We could extend this with a cost model to handle multi-use nodes as + // well. + if (!PN.hasOneUse()) + return false; + Instruction &User = *cast(PN.user_back()); + // Skip phi nodes which just use themselves. + if (&User == &PN) + return false; + // If the user isn't in the same basic block as the phi node, even though we + // *can* speculate it, we don't have a realistic cost model for whether this + // is necessary. This restricts the combine to hoisting operations out of + // straight line code. + // FIXME: This isn't terribly principled. We should have a better model for + // what is "canonical" here. + if (User.getParent() != PN.getParent()) + return false; + + auto *AC = IC.getAssumptionCache(); + auto *DT = IC.getDominatorTree(); + auto *TLI = IC.getTargetLibraryInfo(); + + // The goal of this is to speculate the user around the phi node. However, if + // there is a function call between the phi node and the user and that + // function call might never return, then doing this can change observable + // behavior unless the user is safe to speculate. So we model this as + // speculative execution. + if (!isSafeToSpeculativelyExecute(&User, /*Context*/nullptr, DT, TLI)) + return false; + + // We also don't handle any instruction which read or write memory so we + // don't have to scan for interfering instructions. + // TODO: We could potentially do a short scan to see if it happens to be safe + // even in the face of memory operations. + if (User.mayReadOrWriteMemory()) + return false; + + // Ok, this is a user that can be trivially speculated. Look to see if all + // but one of the phi's operands will trivially simplify away. We actually + // use this to build up the list of operands for the new phi node we'll + // create if everything goes well. + int NumOperands = PN.getNumIncomingValues(); + SmallVector NewOperands; + NewOperands.reserve(NumOperands); + int HoistOpIdx = -1; + for (int i = 0; i < NumOperands; ++i) { + Value &IV = *PN.getIncomingValue(i); + BasicBlock &IB = *PN.getIncomingBlock(i); + // See if we can just simplify the user with the operand. + if (Value *SimplifiedOp = simplifyInstWithReplacedOperand( + User, PN, IV, TLI, DT, AC, IB.getTerminator())) { + // If this didn't simplify to an instruction, dominance is trivially satisfied. + Instruction *SimplifiedOpI = dyn_cast(SimplifiedOp); + if (!SimplifiedOpI) { + NewOperands.push_back(SimplifiedOp); + continue; + } + + // If we simplified an operand back into the original phi node, we won't + // actually elimanet the use of this phi, so bail out. + if (SimplifiedOpI == &PN) + return false; + + // Otherwise we have to see if the simplified instruction is available in + // the predecessor block. + if (DT->dominates(SimplifiedOpI->getParent(), &IB)) { + NewOperands.push_back(SimplifiedOpI); + continue; + } + } + + // If we cannot simplify the user with the operand, and this is the first + // such operand, we will try to speculate the user into the incoming block. + // Record this and put an undef into the new phi operands temporarily. + if (HoistOpIdx != -1) + // Already needed to hoist one operand, give up now that we have a second. + return false; + // Make sure user doesn't come from the same basic block as the incoming + // value. If so, they may be related to each other and we could flip/flop + // between two values in that basic block endlessly. The whole goal of + // hoisting the user is to get it into the basic block with the incoming + // value to enable further combining. + if (User.getParent() == &IB) + return false; + // Also make sure that the incoming value isn't the + // terminator of the predecessor -- if it is we won't have a place to hoist + // the user. + if (&IV == IB.getTerminator()) + return false; + // Make sure that the incoming block has a single successor (this + // block). Otherwise, we might be speculating code into a loop body. + // FIXME: We can do much better than this when LoopInfo is present. + BasicBlock *Succ = IB.getSingleSuccessor(); + if (!Succ) + return false; + assert(Succ == PN.getParent() && "PN's basic block isn't a successor?!"); + // Check that all of the other operands to the user are available in the + // incoming block. + for (Value *UserOp : User.operands()) + if (Instruction *UserOpI = dyn_cast(UserOp)) + if (!DT->dominates(UserOpI->getParent(), &IB)) + return false; + HoistOpIdx = i; + NewOperands.push_back(UndefValue::get(User.getType())); + } + assert(NumOperands == (int)NewOperands.size() && + "Unexpected number of operands!"); + + // Create the new phi node and RAUW from the old user to the new phi. + IC.Builder->SetInsertPoint(&PN); + auto *NewPN = IC.Builder->CreatePHI(User.getType(), NumOperands); + for (int i = 0; i < NumOperands; ++i) + NewPN->addIncoming(NewOperands[i], PN.getIncomingBlock(i)); + IC.ReplaceInstUsesWith(User, NewPN); + + // Check if we need to hoist the user into one of the incoming blocks. This + // may not be necessary if, for instance, every incoming value folded away. + if (HoistOpIdx != -1) { + ++NumPhiNodeUsersHoisted; + + // Replace every use of the phi node with this index's incoming value. + for (Use &U : User.operands()) + if (U.get() == &PN) + U = PN.getIncomingValue(HoistOpIdx); + assert(PN.use_empty() && "Stale uses of the phi node left!"); + + // Now splice the user into the incoming block just befor ethe terminator. + User.moveBefore(PN.getIncomingBlock(HoistOpIdx)->getTerminator()); + + // And wire it up to the new phi node's incoming value. + NewPN->setIncomingValue(HoistOpIdx, &User); + } else { + ++NumPhiNodeUsersFullySimplified; + + // Everything folded away, erase the user completely. + IC.EraseInstFromFunction(User); + } + + // Now erase the PN node which is dead, and return that we have successfully + // simplified it. + IC.EraseInstFromFunction(PN); + return true; +} + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -980,6 +1217,9 @@ } } + if (simplifyOrHoistPhiUsers(*this, PN)) + return nullptr; + // If this is an integer PHI and we know that it has an illegal type, see if // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is Index: test/Transforms/InstCombine/select.ll =================================================================== --- test/Transforms/InstCombine/select.ll +++ test/Transforms/InstCombine/select.ll @@ -412,8 +412,8 @@ %b = select i1 %a, i32 10, i32 20 ret i32 %b ; CHECK-LABEL: @test25( -; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ] -; CHECK-NEXT: ret i32 %a +; CHECK: %[[a:.*]] = phi i32 [ 10, %jump ], [ 20, %entry ] +; CHECK-NEXT: ret i32 %[[a]] } define i32 @test26(i1 %cond) { @@ -427,8 +427,8 @@ %b = select i1 %a, i32 20, i32 10 ret i32 %b ; CHECK-LABEL: @test26( -; CHECK: %a = phi i32 [ 20, %entry ], [ 10, %jump ] -; CHECK-NEXT: ret i32 %a +; CHECK: %[[a:.*]] = phi i32 [ 20, %entry ], [ 10, %jump ] +; CHECK-NEXT: ret i32 %[[a]] } define i32 @test27(i1 %c, i32 %A, i32 %B) { @@ -441,8 +441,8 @@ %b = select i1 %a, i32 %A, i32 %B ret i32 %b ; CHECK-LABEL: @test27( -; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ] -; CHECK-NEXT: ret i32 %a +; CHECK: %[[a:.*]] = phi i32 [ %A, %jump ], [ %B, %entry ] +; CHECK-NEXT: ret i32 %[[a]] } define i32 @test28(i1 %cond, i32 %A, i32 %B) { Index: test/Transforms/InstCombine/shift-sra.ll =================================================================== --- test/Transforms/InstCombine/shift-sra.ll +++ test/Transforms/InstCombine/shift-sra.ll @@ -34,8 +34,8 @@ ret i64 %S ; CHECK-LABEL: @test3( -; CHECK: %P = phi i64 -; CHECK-NEXT: ret i64 %P +; CHECK: %[[P:.*]] = phi i64 +; CHECK-NEXT: ret i64 %[[P]] } define i64 @test4(i1 %X, i64 %Y, i1 %Cond) {