Index: lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombinePHI.cpp +++ lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -781,6 +781,112 @@ return ReplaceInstUsesWith(FirstPhi, Undef); } +// If a PHI node has two edges and the PHI node is used in instructions like +// add, sub, mul, div, shifts; if one of incoming values is a constant +// that makes the instruction and identity operation, we can hoist this +// instruction into one of the basic blocks. +// Because of such a transformation, the identity operation won't be +// executed, since it doesn't contribute to the result. +// +static Instruction *PartiallySimplifyIdentityOps(PHINode &PN, const Constant &C, + Value &IncomingVal, + Instruction &PNUser, + InstCombiner &IC) { + if (!PNUser.isBinaryOp()) + return nullptr; + if (PN.getParent() != PNUser.getParent()) + return nullptr; + + // C IncomingVal + // \ / + // \ 0 1 / -- (IncomingValIdx) + // \ / + // PN = phi [C , ...] [ IncomingVal, BB ] + // ... + // 0 1 -- (OpOperandIdx) + // PNUser = op PN, x + + const unsigned IncomingValIdx = + (&IncomingVal == PN.getIncomingValue(0)) ? 0 : 1; + const unsigned OpOperandIdx = (&PN == PNUser.getOperand(0)) ? 1 : 0; + + // Exit if not an identity operation. + // For everything except add Add and Mul constant must be on the RHS. + switch (PNUser.getOpcode()) { + default: + return nullptr; + case Instruction::Add: + if (!C.isZeroValue()) + return nullptr; + break; + + case Instruction::Sub: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + if (!C.isZeroValue() || OpOperandIdx == 1) + return nullptr; + break; + + case Instruction::Mul: + if (!C.isOneValue()) + return nullptr; + break; + + case Instruction::UDiv: + case Instruction::SDiv: + if (!C.isOneValue() || OpOperandIdx == 1) + return nullptr; + break; + } + + BasicBlock *BB = PN.getIncomingBlock(IncomingValIdx); + auto *Terminator = BB->getTerminator(); + + // We can only push the instruction to a predecessor if our predecessor is + // guaranteed to have a single, unique successor. + if (BB->getUniqueSuccessor() != PN.getParent()) + return nullptr; + + if (const auto *Incoming = dyn_cast(&IncomingVal)) + if (!IC.getDominatorTree()->dominates(Incoming, Terminator)) + return nullptr; + + // Operand must be available in newly generated instruction and + // as an incoming value of the PHI node. + if (const auto *Operand = + dyn_cast(PNUser.getOperand(OpOperandIdx))) + if (!IC.getDominatorTree()->dominates(Operand, Terminator) || + !IC.getDominatorTree()->dominates(Operand, &PN)) + return nullptr; + + // Ensure that the non-constant value in the PHI node doesn't come + // from the same BasicBlock as the PHI node. This prevents errors + // that could appear with loops (loop backedge could have this + // problem). + if (PN.getIncomingBlock(IncomingValIdx) == PN.getParent()) + return nullptr; + + Value *LHS = &IncomingVal, *RHS = PNUser.getOperand(OpOperandIdx); + if (OpOperandIdx == 0) + std::swap(LHS, RHS); + + // Add new instruction to one of the edges. + IRBuilder<> Builder(Terminator); + auto *NewInst = cast( + Builder.CreateBinOp(cast(PNUser).getOpcode(), LHS, RHS)); + NewInst->copyIRFlags(&PNUser); + + // The new incoming values are: + // - result of the newly emmited instruction + // - operand of the instruction + PN.setIncomingValue(IncomingValIdx, NewInst); + PN.setIncomingValue(IncomingValIdx == 0 ? 1 : 0, + PNUser.getOperand(OpOperandIdx)); + IC.ReplaceInstUsesWith(PNUser, &PN); + return &PN; +} + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -822,6 +928,21 @@ PHIUser->user_back() == &PN) { return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } + + // If this phi has one use, exactly 2 edges and one is a constant, we + // may be able to apply the PartiallySimplifyIdentityOps optimization. + if (PN.getNumIncomingValues() == 2) { + const Constant *C = dyn_cast(PN.getIncomingValue(0)); + Value *Val = PN.getIncomingValue(1); + if (!C) { + C = dyn_cast(PN.getIncomingValue(1)); + Val = PN.getIncomingValue(0); + } + if (C && !isa(Val)) + if (auto *I = + PartiallySimplifyIdentityOps(PN, *C, *Val, *PHIUser, *this)) + return I; + } } // We sometimes end up with phi cycles that non-obviously end up being the Index: test/Transforms/InstCombine/phi.ll =================================================================== --- test/Transforms/InstCombine/phi.ll +++ test/Transforms/InstCombine/phi.ll @@ -247,8 +247,9 @@ ret i64 %tmp2 ; CHECK-LABEL: @test12( ; CHECK-NOT: zext +; CHECK: %[[X:.*]] = add i64 %{{.*}}, %Val ; CHECK: end: -; CHECK-NEXT: phi i64 [ 0, %entry ], [ %Val, %two ] +; CHECK-NEXT: phi i64 [ %tmp41, %entry ], [ %[[X]], %two ] ; CHECK-NOT: phi ; CHECK: ret i64 } @@ -630,3 +631,181 @@ %y = phi i32 [ undef, %entry ] ret i32 %y } + +; CHECK-LABEL: @test28( +; CHECK add nsw i32 %x, %dfd +; CHECK: if.end: +; CHECK-NEXT: phi i32 [ %{{.*}}, %if.else ], [ %x, %entry ] +; CHECK-NEXT: ret +define i32 @test28(i1* nocapture readonly %cond, i32* nocapture readonly %a, i32 %x, i32 %v) { +entry: + %0 = load volatile i1, i1* %cond, align 1 + br i1 %0, label %if.else, label %if.end + +if.else: + %1 = load volatile i32, i32* %a, align 4 + br label %if.end + +if.end: + %v.addr.0 = phi i32 [ %1, %if.else ], [ 0, %entry ] + %div = add nsw i32 %x, %v.addr.0 + ret i32 %div +} + +; CHECK-LABEL: @test29( +; CHECK: ashr i32 %x, %1 +; CHECK: if.end: +; CHECK-NEXT: phi i32 [ %{{.*}}, %if.else ], [ %x, %entry ] +; CHECK-NEXT: ret +define i32 @test29(i1* nocapture readonly %cond, i32* nocapture readonly %a, i32 %x, i32 %v) { +entry: + %0 = load volatile i1, i1* %cond, align 1 + br i1 %0, label %if.else, label %if.end + +if.else: + %1 = load volatile i32, i32* %a, align 4 + br label %if.end + +if.end: + %v.addr.0 = phi i32 [ %1, %if.else ], [ 0, %entry ] + %div = ashr i32 %x, %v.addr.0 + ret i32 %div +} + +; CHECK-LABEL: @test30( +; CHECK: sdiv i32 %x, %1 +; CHECK: if.end: +; CHECK-NEXT: phi i32 [ %{{.*}}, %if.else ], [ %x, %entry ] +; CHECK-NEXT: ret +define i32 @test30(i1* nocapture readonly %cond, i32* nocapture readonly %a, i32 %x, i32 %v) { +entry: + %0 = load volatile i1, i1* %cond, align 1 + br i1 %0, label %if.else, label %if.end + +if.else: + %1 = load volatile i32, i32* %a, align 4 + br label %if.end + +if.end: + %v.addr.0 = phi i32 [ %1, %if.else ], [ 1, %entry ] + %div = sdiv i32 %x, %v.addr.0 + ret i32 %div +} + +; CHECK-LABEL: @test31( +; CHECK: mul i32 +; CHECK: if.end: +; CHECK-NEXT: phi i32 [ %{{.*}}, %if.else ], [ %x, %entry ] +; CHECK-NEXT: ret +define i32 @test31(i1* nocapture readonly %cond, i32* nocapture readonly %a, i32 %x, i32 %v) { +entry: + %0 = load volatile i1, i1* %cond, align 1 + br i1 %0, label %if.else, label %if.end + +if.else: + %1 = load volatile i32, i32* %a, align 4 + br label %if.end + +if.end: + %v.addr.0 = phi i32 [ %1, %if.else ], [ 1, %entry ] + %div = mul i32 %x, %v.addr.0 + ret i32 %div +} + +; CHECK-LABEL: @test32( +; CHECK: sdiv i32 %x, %1 +; CHECK: if.end: +; CHECK-NEXT: phi i32 [ %x, %entry ], [ %{{.*}}, %if.else ] +; CHECK-NEXT: ret +define i32 @test32(i1* nocapture readonly %cond, i32* nocapture readonly %a, i32 %x, i32 %v) { +entry: + %0 = load volatile i1, i1* %cond, align 1 + br i1 %0, label %if.else, label %if.end + +if.else: + %1 = load volatile i32, i32* %a, align 4 + br label %if.end + +if.end: + %v.addr.0 = phi i32 [ 1, %entry ], [ %1, %if.else ] + %div = sdiv i32 %x, %v.addr.0 + ret i32 %div +} + +; Constant (0) is on the LHS of shift - should not aplpy the transformation +; CHECK-LABEL: @test33( +; CHECK: if.end: +; CHECK-NEXT: %v.addr.0 = phi i32 [ 0, %entry ], [ %1, %if.else ] +; CHECK-NEXT: shl +; CHECK-NEXT: ret +define i32 @test33(i1* nocapture readonly %cond, i32* nocapture readonly %a, i32 %x, i32 %v) { +entry: + %0 = load volatile i1, i1* %cond, align 1 + br i1 %0, label %if.else, label %if.end + +if.else: + %1 = load volatile i32, i32* %a, align 4 + br label %if.end + +if.end: + %v.addr.0 = phi i32 [ 0, %entry ], [ %1, %if.else ] + %div = shl i32 %v.addr.0, %x + ret i32 %div +} + +; SDiv %div2 is not always executed - shouldn't hoist it into incoming block cond.false +; CHECK-LABEL: @test34( +; CHECK: cond.true: +; CHECK-NEXT: %cond10 = phi i32 [ %div, %cond.false ], [ 1, %entry ] +; CHECK-NEXT: %div2 = sdiv i32 %.pre, %cond10 +; CHECK-NEXT: br label %cond.end +define i32 @test34(i1* nocapture readonly %cond, i1* nocapture readonly %cond2, + i32* nocapture readonly %a, i32* nocapture readonly %b, i32* nocapture %c) { +entry: + %0 = load volatile i32, i32* %a, align 4 + %tobool = icmp eq i32 %0, 0 + %.pre = load volatile i32, i32* %a, align 4 + br i1 %tobool, label %cond.true, label %cond.false + +cond.false: + %div = sdiv i32 %.pre, %0 + %tobool1 = icmp eq i32 %div, 0 + br i1 %tobool1, label %cond.end, label %cond.true + +cond.true: + %cond10 = phi i32 [ %div, %cond.false ], [ 1, %entry ] + %div2 = sdiv i32 %.pre, %cond10 + br label %cond.end + +cond.end: + %cond6 = phi i32 [ %div2, %cond.true ], [ 0, %cond.false ] + store volatile i32 %cond6, i32* %c, align 4 + ret i32 0 +} + +; Don't hoist instruction if incoming edge's terminator is not +; a branch instruction +; CHECK-LABEL: @test35( +; CHECK: entry: +; CHECK-NEXT: %0 = load volatile i32, i32* %a, align 4 +; CHECK-NEXT: %1 = load volatile i32, i32* %b, align 4 +; CHECK-NEXT: switch i32 %0 +; CHECK: sw.epilog: +; CHECK-NEXT: phi i32 [ 0, %sw.bb ], [ %1, %entry ] +define i32 @test35(i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) { +entry: + %0 = load volatile i32, i32* %a, align 4 + %1 = load volatile i32, i32* %b, align 4 + switch i32 %0, label %sw.epilog [ + i32 0, label %sw.bb + ] + +sw.bb: + %2 = load volatile i32, i32* %c, align 4 + br label %sw.epilog + +sw.epilog: + %3 = phi i32 [ 0, %sw.bb ], [ %1, %entry ] + %add = add nsw i32 %1, %3 + ret i32 %add +}