Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -3944,27 +3944,57 @@ return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); } -/// See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { - // If all of the PHI's incoming values are the same then replace the PHI node - // with the common value. + // If in the PHI graph rooted at this PHI node there is only one non-PHI value + // then replace this node with that value. If there's more than one value in + // the graph then we want to find the furthest phi node before we start to see + // any non-PHI values and replace this with that. Value *CommonValue = nullptr; + Value *BestPHINode = nullptr; bool HasUndefInput = false; - for (Value *Incoming : PN->incoming_values()) { - // If the incoming value is the phi node itself, it can safely be skipped. - if (Incoming == PN) continue; - if (isa(Incoming)) { - // Remember that we saw an undef value, but otherwise ignore them. - HasUndefInput = true; - continue; + bool HasBranchingPHI = false; + std::set PHINodesSeen; + std::set PendingPHINodes = { PN }; + while (!PendingPHINodes.empty()) { + PHINode *ThisPN = *PendingPHINodes.begin(); + PendingPHINodes.erase(ThisPN); + PHINodesSeen.insert(ThisPN); + // If there's some other phi node waiting to be processed then we're looking + // at a branch in the phi graph. + if (!PendingPHINodes.empty()) + HasBranchingPHI = true; + // If we haven't yet found any non-phi value and this phi dominates PN then + // this phi is better than PN, though we can't make use of it if we've + // detected a branch in the phi graph as we don't know if all paths from + // here reach PN. + // FIXME: we can know that if we make sure to traverse the phi graph in the + // correct order. + if (ThisPN != PN && !HasBranchingPHI && !CommonValue && + ValueDominatesPHI(ThisPN, PN, Q.DT)) + BestPHINode = ThisPN; + for (Value *Incoming : ThisPN->incoming_values()) { + if (PHINode *IncomingPN = dyn_cast(Incoming)) { + // If the incoming value is a phi node we've already seen it can safely + // be skipped, otherwise add it to the set of phi nodes we need to + // examine. + if (!PHINodesSeen.count(IncomingPN)) + PendingPHINodes.insert(IncomingPN); + continue; + } + if (isa(Incoming)) { + // Remember that we saw an undef value, but otherwise ignore them. + HasUndefInput = true; + continue; + } + if (CommonValue && Incoming != CommonValue) { + return BestPHINode; // Not the same, bail out. + } + CommonValue = Incoming; } - if (CommonValue && Incoming != CommonValue) - return nullptr; // Not the same, bail out. - CommonValue = Incoming; - } + }; // If CommonValue is null then all of the incoming values were either undef or - // equal to the phi node itself. + // equal to the phi node itself, or there is no non-phi in this phi graph. if (!CommonValue) return UndefValue::get(PN->getType()); @@ -3972,7 +4002,7 @@ // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : BestPHINode; return CommonValue; } Index: test/Analysis/BasicAA/phi-aa.ll =================================================================== --- test/Analysis/BasicAA/phi-aa.ll +++ test/Analysis/BasicAA/phi-aa.ll @@ -77,4 +77,39 @@ declare void @inc(i32*) - +; When we have a chain of phis in nested loops we should recognise if there's +; actually only one underlying value. +; CHECK-LABEL: loop_phi_chain +; CHECK: NoAlias: i32* %val1, i32* @Y +; CHECK: NoAlias: i32* %val2, i32* @Y +; CHECK: NoAlias: i32* %val3, i32* @Y +define void @loop_phi_chain(i32 %a, i32 %b, i32 %c) { +entry: + br label %loop1 + +loop1: + %n1 = phi i32 [ 0, %entry ], [ %add1, %loop2 ] + %val1 = phi i32* [ @X, %entry ], [ %val2, %loop2 ] + %add1 = add i32 %n1, 1 + %cmp1 = icmp ne i32 %n1, 32 + br i1 %cmp1, label %loop2, label %end + +loop2: + %n2 = phi i32 [ 0, %loop1 ], [ %add2, %loop3 ] + %val2 = phi i32* [ %val1, %loop1 ], [ %val3, %loop3 ] + %add2 = add i32 %n2, 1 + %cmp2 = icmp ne i32 %n2, 32 + br i1 %cmp2, label %loop3, label %loop1 + +loop3: + %n3 = phi i32 [ 0, %loop2 ], [ %add3, %loop3 ] + %val3 = phi i32* [ %val2, %loop2 ], [ %val3, %loop3 ] + store i32 0, i32* %val3, align 4 + store i32 0, i32* @Y, align 4 + %add3 = add i32 %n3, 1 + %cmp3 = icmp ne i32 %n3, 32 + br i1 %cmp3, label %loop3, label %loop2 + +end: + ret void +} Index: test/Transforms/GVN/pre-after-rle.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/pre-after-rle.ll @@ -0,0 +1,36 @@ +; RUN: opt -gvn -S < %s | FileCheck %s + +declare noalias i8* @malloc(i64) + +; Detecting that %s is fully redundant should let us detect that %w is partially +; redundant. +define void @fn(i32** noalias %start, i32* %width, i32 %h) { +entry: + %call = tail call noalias i8* @malloc(i64 1024) + %call.cast = bitcast i8* %call to i32* + store i32* %call.cast, i32** %start, align 8 + br label %preheader + +preheader: + %cmp = icmp slt i32 1, %h + br i1 %cmp, label %body, label %exit + +; CHECK-LABEL: preheader.body_crit_edge: +; CHECK: load i32, i32* %width, align 8 + +; CHECK-LABEL: body: +; CHECK-NOT: load i32*, i32** %start, align 8 +; CHECK-NOT: load i32, i32* %width, align 8 +body: + %j = phi i32 [ 0, %preheader ], [ %j.next, %body ] + %s = load i32*, i32** %start, align 8 + %idx = getelementptr inbounds i32, i32* %s, i64 0 + store i32 0, i32* %idx, align 4 + %j.next = add nuw nsw i32 %j, 1 + %w = load i32, i32* %width, align 8 + %cmp3 = icmp slt i32 %j.next, %w + br i1 %cmp3, label %body, label %preheader + +exit: + ret void +} Index: test/Transforms/InstSimplify/phi.ll =================================================================== --- test/Transforms/InstSimplify/phi.ll +++ test/Transforms/InstSimplify/phi.ll @@ -22,3 +22,88 @@ %e = icmp eq i32 %d, 0 ret i1 %e } + +define i32* @test2(i32 %a, i32 %b, i32 %c, i32* %ptr) { +entry: + br label %loop1 + +; CHECK-LABEL: loop1: +; CHECK-NOT: phi i32* +loop1: + %n1 = phi i32 [ 0, %entry ], [ %add1, %loop2 ] + %val1 = phi i32* [ %ptr, %entry ], [ %val2, %loop2 ] + %add1 = add i32 %n1, 1 + %cmp1 = icmp ne i32 %n1, 32 + br i1 %cmp1, label %loop2, label %end + +; CHECK-LABEL: loop2: +; CHECK-NOT: phi i32* +loop2: + %n2 = phi i32 [ 0, %loop1 ], [ %add2, %loop3 ] + %val2 = phi i32* [ %val1, %loop1 ], [ %val3, %loop3 ] + %add2 = add i32 %n2, 1 + %cmp2 = icmp ne i32 %n2, 32 + br i1 %cmp2, label %loop3, label %loop1 + +; CHECK-LABEL: loop3: +; CHECK-NOT: phi i32* +; CHECK: store i32 0, i32* %ptr, align 4 +loop3: + %n3 = phi i32 [ 0, %loop2 ], [ %add3, %loop3 ] + %val3 = phi i32* [ %val2, %loop2 ], [ %val3, %loop3 ] + store i32 0, i32* %val3, align 4 + %add3 = add i32 %n3, 1 + %cmp3 = icmp ne i32 %n3, 32 + br i1 %cmp3, label %loop3, label %loop2 + +; CHECK-LABEL: end: +; CHECK: ret i32* %ptr +end: + ret i32* %val1 +} + +define i32* @test3(i32 %a, i32 %b, i32 %c, i32* %ptr) { +entry: + %cmp1 = icmp ne i32 %a, 0 + br i1 %cmp1, label %loop.then, label %test2 + +test2: + %cmp2 = icmp ne i32 %b, 0 + br i1 %cmp2, label %loop.else, label %loop + +; CHECK-LABEL: loop: +; CHECK-NOT: phi i32* +loop: + %n1 = phi i32 [ 0, %test2 ], [ %inc, %loop.end ] + %phi1 = phi i32* [ %ptr, %test2 ], [ %phi4, %loop.end ] + %cmp3 = icmp ne i32 %c, 0 + br i1 %cmp3, label %loop.then, label %loop.else + +; CHECK-LABEL: loop.then: +; CHECK-NOT: phi i32* +loop.then: + %n2 = phi i32 [ 0, %entry ], [ %n1, %loop ] + %phi2 = phi i32* [ %ptr, %entry ], [ %phi1, %loop ] + br label %loop.end + +; CHECK-LABEL: loop.else: +; CHECK-NOT: phi i32* +loop.else: + %n3 = phi i32 [ 0, %test2 ], [ %n1, %loop ] + %phi3 = phi i32* [ %ptr, %test2 ], [ %phi1, %loop ] + br label %loop.end + +; CHECK-LABEL: loop.end: +; CHECK-NOT: phi i32* +loop.end: + %n4 = phi i32 [ %n2, %loop.then ], [ %n3, %loop.else ] + %phi4 = phi i32* [ %phi2, %loop.then ], [ %phi3, %loop.else ] + %inc = add i32 %n4, 1 + %cmp4 = icmp ne i32 %inc, 4 + br i1 %cmp4, label %loop, label %end + +; CHECK-LABEL: end: +; CHECK: ret i32* %ptr +end: + ret i32* %phi4 +} Index: test/Transforms/LoopSimplify/pr28272.ll =================================================================== --- test/Transforms/LoopSimplify/pr28272.ll +++ test/Transforms/LoopSimplify/pr28272.ll @@ -118,7 +118,7 @@ ; CHECK: bb2.loopexit: bb2.loopexit: ; preds = %bb3 - %i.ph = phi i32 [ 0, %bb3 ] + %i.ph = phi i32 [ 1, %bb3 ] br label %bb2 ; CHECK: bb2.outer: Index: test/Transforms/LoopUnroll/rebuild_lcssa.ll =================================================================== --- test/Transforms/LoopUnroll/rebuild_lcssa.ll +++ test/Transforms/LoopUnroll/rebuild_lcssa.ll @@ -24,12 +24,12 @@ br label %L2_header L2_header: - %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ] + %y1 = phi i64 [ 0, %L1_header ], [ %x.lcssa, %L2_latch ] br label %L3_header L3_header: %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ] - %x = add i64 undef, -1 + %x = add i64 %y1, -1 br i1 true, label %L2_latch, label %L3_body L2_latch: @@ -64,12 +64,12 @@ br label %L2_header L2_header: - %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ] + %a = phi i64 [ 0, %L1_header ], [ %dec_us, %L3_header ] br label %L3_header L3_header: %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ] - %dec_us = add i64 undef, -1 + %dec_us = add i64 %a, -1 br i1 true, label %L2_header, label %L3_break_to_L1 ; CHECK: L3_break_to_L1: @@ -89,12 +89,12 @@ } ; CHECK-LABEL: @foo3 -define void @foo3() { +define void @foo3(i8* %p) { entry: br label %L1_header L1_header: - %a = phi i8* [ %b, %L1_latch ], [ null, %entry ] + %a = phi i8* [ %b, %L1_latch ], [ %p, %entry ] br i1 undef, label %L2_header, label %L1_latch L2_header: Index: test/Transforms/LoopVectorize/AArch64/pr36032.ll =================================================================== --- test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ] -; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[G_1_LCSSA:%.*]], [[FOR_COND_CLEANUP]] ] +; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[G_1_LCSSA:%.*]], [[FOR_COND_CLEANUP]] ] ; CHECK-NEXT: [[CMP12:%.*]] = icmp ult i32 [[G_0]], 4 ; CHECK-NEXT: [[CONV:%.*]] = and i32 [[F_0]], 65535 ; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP]] @@ -119,7 +119,7 @@ for.cond: ; preds = %for.cond.cleanup, %entry %f.0 = phi i32 [ 0, %entry ], [ %add5, %for.cond.cleanup ] - %g.0 = phi i32 [ undef, %entry ], [ %g.1.lcssa, %for.cond.cleanup ] + %g.0 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %for.cond.cleanup ] %cmp12 = icmp ult i32 %g.0, 4 %conv = and i32 %f.0, 65535 br i1 %cmp12, label %for.body.lr.ph, label %for.cond.cleanup