Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -24,6 +24,52 @@ #define DEBUG_TYPE "instcombine" +/// Verify that if we fold the PHI arguments into a single operation with +/// the PHI node as input, the original operations become unused. This +/// is the case if the only user(s) of the original operations are either +/// this PHI node, or another PHI node with the same set of arguments +/// (because the operation will be performed on all such nodes if it is +/// performed on any such node). In the case of multiple PHI nodes, we +/// only want to perform the transformation if the number of PHI nodes +/// (i.e. the number of new operations that will be introduced) is not +/// larger than the number of original operations. +static bool IsFoldPHIArgProfitable(PHINode &PN) { + SmallPtrSet PHIs; + SmallPtrSet Values; + + PHIs.insert(&PN); + for (Value *IncValue : PN.incoming_values()) + Values.insert(IncValue); + + for (Value *IncValue : Values) + if (!isa(IncValue)) + for (auto &U : IncValue->uses()) { + // All uses must be PHI nodes. + auto *P = dyn_cast(U.getUser()); + if (!P) + return false; + // If we already know this PHI node, we're good. Otherwise, + // add it to our set. If the set is now so large that the + // operation would become unprofitable, bail. + if (!PHIs.insert(P).second) + continue; + if (PHIs.size() > Values.size()) + return false; + // Verify that the new PHI node has exactly the same set of + // argument values as the original one. + SmallPtrSet OtherValues; + for (Value *OtherValue : P->incoming_values()) { + OtherValues.insert(OtherValue); + if (!Values.count(OtherValue)) + return false; + } + if (OtherValues.size() != Values.size()) + return false; + } + + return true; +} + /// The PHI arguments will be folded into a single operation with a PHI node /// as input. The debug location of the single operation will be the merged /// locations of the original PHI node arguments. @@ -51,10 +97,10 @@ Type *LHSType = LHSVal->getType(); Type *RHSType = RHSVal->getType(); - // Scan to see if all operands are the same opcode, and all have one use. + // Scan to see if all operands are the same opcode. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (!I || I->getOpcode() != Opc || !I->hasOneUse() || + if (!I || I->getOpcode() != Opc || // Verify type of the LHS matches so we don't fold cmp's of different // types. I->getOperand(0)->getType() != LHSType || @@ -78,6 +124,10 @@ if (!LHSVal && !RHSVal) return nullptr; + // Verify that folding is profitable (the original operands are dead). + if (!IsFoldPHIArgProfitable(PN)) + return nullptr; + // Otherwise, this is safe to transform! Value *InLHS = FirstInst->getOperand(0); @@ -150,10 +200,10 @@ bool AllInBounds = true; - // Scan to see if all operands are the same opcode, and all have one use. + // Scan to see if all operands are the same opcode. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); - if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || + if (!GEP || GEP->getType() != FirstInst->getType() || GEP->getNumOperands() != FirstInst->getNumOperands()) return nullptr; @@ -203,6 +253,10 @@ if (AllBasePointersAreAllocas) return nullptr; + // Verify that folding is profitable (the original operands are dead). + if (!IsFoldPHIArgProfitable(PN)) + return nullptr; + // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. SmallVector OperandPhis(FixedOperands.size()); @@ -322,7 +376,7 @@ // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUse()) + if (!LI) return nullptr; // We can't sink the load if the loaded value could be modified between @@ -348,6 +402,10 @@ return nullptr; } + // Verify that folding is profitable (the original operands are dead). + if (!IsFoldPHIArgProfitable(PN)) + return nullptr; + // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), @@ -438,8 +496,8 @@ unsigned NumConsts = 0; for (Value *V : Phi.incoming_values()) { if (auto *Zext = dyn_cast(V)) { - // All zexts must be identical and have one use. - if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse()) + // All zexts must be identical. + if (Zext->getSrcTy() != NarrowType) return nullptr; NewIncoming.push_back(Zext->getOperand(0)); NumZexts++; @@ -465,6 +523,10 @@ if (NumConsts == 0 || NumZexts < 2) return nullptr; + // Verify that folding is profitable (the original operands are dead). + if (!IsFoldPHIArgProfitable(Phi)) + return nullptr; + // All incoming values are zexts or constants that are safe to truncate. // Create a new phi node of the narrow type, phi together all of the new // operands, and zext the result back to the original type. @@ -523,7 +585,7 @@ // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + if (!I || !I->isSameOperationAs(FirstInst)) return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) @@ -533,6 +595,10 @@ } } + // Verify that folding is profitable (the original operands are dead). + if (!IsFoldPHIArgProfitable(PN)) + return nullptr; + // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), @@ -891,10 +957,7 @@ if (isa(PN.getIncomingValue(0)) && isa(PN.getIncomingValue(1)) && cast(PN.getIncomingValue(0))->getOpcode() == - cast(PN.getIncomingValue(1))->getOpcode() && - // FIXME: The hasOneUse check will fail for PHIs that use the value more - // than themselves more than once. - PN.getIncomingValue(0)->hasOneUse()) + cast(PN.getIncomingValue(1))->getOpcode()) if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) return Result; Index: test/Transforms/InstCombine/phi.ll =================================================================== --- test/Transforms/InstCombine/phi.ll +++ test/Transforms/InstCombine/phi.ll @@ -879,3 +879,56 @@ %cmp1 = icmp ne i32 %a.0, 0 ret i1 %cmp1 } + + +; Verify that we fold arguments through multiple identical PHI nodes + +; CHECK-LABEL: @phi_multi +; CHECK: loop_start: +; CHECK: %[[RES2:.*]] = phi i8 [ %valB, %entry ], [ %valB.next, %loop_next ] +; CHECK: exit1: +; CHECK-NEXT: %[[RES1:.*]] = phi i8 [ %valB, %entry ], [ %valB.next, %loop_next ] +; CHECK-NEXT: %[[RES1A:.*]] = zext i8 %[[RES1]] to i64 +; CHECK-NEXT: %[[RES1B:.*]] = or i64 %[[RES1A]], %c +; CHECK-NEXT: ret i64 %[[RES1B]] +; CHECK: exit2: +; CHECK-NEXT: %[[RES2A:.*]] = zext i8 %[[RES2]] to i64 +; CHECK-NEXT: %[[RES2B:.*]] = or i64 %[[RES2A]], %c +; CHECK-NEXT: ret i64 %[[RES2B]] + +define i64 @phi_multi(i8* %ptrA, i8* %ptrB, i64 %c) { +entry: + %valA = load i8, i8* %ptrA + %valB = load i8, i8* %ptrB + %valB.ext = zext i8 %valB to i64 + %valB.res = or i64 %c, %valB.ext + %cmp1 = icmp eq i8 %valA, %valB + br i1 %cmp1, label %exit1, label %loop_start + +loop_start: + %ptrA.0 = phi i8* [ %ptrA, %entry ], [ %ptrA.next, %loop_next ] + %ptrB.0 = phi i8* [ %ptrB, %entry ], [ %ptrB.next, %loop_next ] + %valA.0 = phi i8 [ %valA, %entry ], [ %valA.next, %loop_next ] + %valB.res.0 = phi i64 [ %valB.res, %entry ], [ %valB.next.res, %loop_next ] + %cmp2 = icmp eq i8 %valA.0, 0 + br i1 %cmp2, label %exit2, label %loop_next + +loop_next: + %ptrA.next = getelementptr inbounds i8, i8* %ptrA.0, i64 1 + %ptrB.next = getelementptr inbounds i8, i8* %ptrB.0, i64 1 + %valA.next = load i8, i8* %ptrA.next + %valB.next = load i8, i8* %ptrB.next + %valB.next.ext = zext i8 %valB.next to i64 + %valB.next.res = or i64 %c, %valB.next.ext + %cmp3 = icmp eq i8 %valA.next, %valB.next + br i1 %cmp3, label %loop_start, label %exit1 + +exit1: + %valB.res.1 = phi i64 [ %valB.res, %entry ], [ %valB.next.res, %loop_next ] + ret i64 %valB.res.1 + +exit2: + ret i64 %valB.res.0 +} + + Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -280,26 +280,26 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) { ; EPILOG: test5( ; EPILOG: exit1.loopexit: -; EPILOG-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], +; EPILOG-NEXT: %iv.pn = phi i64 [ %iv, %loop_exiting ], [ %iv, %loop_exiting ], [ %iv_next, %loop_exiting.1 ], [ %iv_next, %loop_exiting.1 ], [ %iv_next.1, %loop_exiting.2 ], ; EPILOG-NEXT: br label %exit1 ; EPILOG: exit1.loopexit2: -; EPILOG-NEXT: %ivy.epil = add i64 %iv.epil, %add ; EPILOG-NEXT: br label %exit1 ; EPILOG: exit1: -; EPILOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ] +; EPILOG-NEXT: %iv.pn.pn = phi i64 [ %iv.pn, %exit1.loopexit ], [ %iv.epil, %exit1.loopexit2 ] +; EPILOG-NEXT: %result = add i64 %iv.pn.pn, %add ; EPILOG-NEXT: ret i64 %result ; EPILOG: loop_latch.7: ; EPILOG: %niter.nsub.7 = add i64 %niter, -8 ; PROLOG: test5( ; PROLOG: exit1.loopexit: -; PROLOG-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], +; PROLOG-NEXT: %iv.pn = phi i64 [ %iv, %loop_exiting ], [ %iv, %loop_exiting ], [ %iv_next, %loop_exiting.1 ], [ %iv_next, %loop_exiting.1 ], [ %iv_next.1, %loop_exiting.2 ], ; PROLOG-NEXT: br label %exit1 ; PROLOG: exit1.loopexit1: -; PROLOG-NEXT: %ivy.prol = add i64 %iv.prol, %add ; PROLOG-NEXT: br label %exit1 ; PROLOG: exit1: -; PROLOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ] +; PROLOG-NEXT: %iv.pn.pn = phi i64 [ %iv.pn, %exit1.loopexit ], [ %iv.prol, %exit1.loopexit1 ] +; PROLOG-NEXT: %result = add i64 %iv.pn.pn, %add ; PROLOG-NEXT: ret i64 %result ; PROLOG: loop_latch.7: ; PROLOG: %iv_next.7 = add nsw i64 %iv, 8