Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1397,6 +1397,87 @@ return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty); } +// FoldPHIArgOrIntoPHI finds a phi node, which has OR instruction +// as an incoming value and AND instruction as its use. +// If the OR and AND instructions take the same operand +// as (%A OR %B) AND %B then replace the incoming value by %B. +// For example, +// BB1: +// %or = or i64 %val, 1 +// br %BB2 +// BB2: +// %phi = phi i64 [ %or, %BB1 ], ... # -> phi i64 [ 1, %BB1 ], ... +// %and = and i64 %phi, 1 +// This optimization allows jump threading to find the opportunity +// in method call whose return value is std::pair or . +Instruction *InstCombiner::FoldPHIArgOrIntoPHI(BinaryOperator &AndInst) { + PHINode *Phi = dyn_cast(AndInst.getOperand(0)); + if (Phi == nullptr) + return nullptr; + + Value *AndVal = AndInst.getOperand(1); + auto IsEligible = [&](Value *V) { + return match(V, m_Or(m_Value(), m_Specific(AndVal))); + }; + if (llvm::none_of(Phi->incoming_values(), IsEligible)) + return nullptr; + + PHINode* NewPhi = Phi; + if (!Phi->hasOneUse()) { + // When Phi is used by other instruction, we need to create a new phi + // node for this AND instruction. To avoid increasing total code size, + // we create a new phi if the AND can be eliminated later. + // When all incoming values are created by concatenating two integers, + // e.g. (Shl(x, 32) OR ZExt(y)) for creating i64 from two i32, + // and the AND instruction is used to extract one of them, + // we can eliminate the AND instruction later by SimplifyDemandedBits. + if (!isa(AndVal)) + return nullptr; + + APInt Mask = APInt(Phi->getType()->getPrimitiveSizeInBits(), + cast(AndVal)->getZExtValue()); + auto IsExtract = [&](Value *V) { + if (isa(V)) + return true; + // If all non-zero bits of the mask is zero for LHS(RHS) and + // all zero bits of the mask is already known to be zero in RHS(LHS), + // then the AND instruction is just for extracting RHS as is. + Value *LHS = nullptr, *RHS = nullptr; + if (match(V, m_Or(m_Value(LHS), m_Value(RHS)))) { + // Avoid computeKnownBits for a trivial case. + if (RHS == AndVal || LHS == AndVal) + return true; + APInt LHSZero = computeKnownBits(LHS, 0, &AndInst).Zero; + APInt RHSZero = computeKnownBits(RHS, 0, &AndInst).Zero; + return ((Mask.isSubsetOf(LHSZero) && + (~Mask).isSubsetOf(RHSZero)) || + (Mask.isSubsetOf(RHSZero) && + (~Mask).isSubsetOf(LHSZero))); + } + return false; + }; + + if (!llvm::all_of(Phi->incoming_values(), IsExtract)) + return nullptr; + + // If there is another use of the phi node, we create a new one + // for this AND instruction by cloning the original phi node. + NewPhi = cast(Phi->clone()); + InsertNewInstBefore(NewPhi, *Phi); + AndInst.setOperand(0, NewPhi); + } + + // We replace the incoming OR with AndVal. + for (unsigned Idx = 0; Idx < NewPhi->getNumIncomingValues(); Idx++) { + Value *V = NewPhi->getIncomingValue(Idx); + if (match(V, m_Or(m_Value(), m_Specific(AndVal)))) + NewPhi->setIncomingValue(Idx, AndVal); + } + Worklist.Add(NewPhi); + + return &AndInst; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -1426,6 +1507,10 @@ if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); + // Try to fold ((A|B)&B) code sequence over Phi. + if (Instruction *Inst = FoldPHIArgOrIntoPHI(I)) + return Inst; + const APInt *C; if (match(Op1, m_APInt(C))) { Value *X, *Y; Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -701,6 +701,8 @@ Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN); + Instruction *FoldPHIArgOrIntoPHI(BinaryOperator &I); + /// If an integer typed PHI has only one use which is an IntToPtr operation, /// replace the PHI with an existing pointer typed PHI if it exists. Otherwise /// insert a new pointer typed PHI and replace the original one. Index: test/Transforms/InstCombine/fold-or-phi.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/fold-or-phi.ll @@ -0,0 +1,202 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define signext i64 @test1(i1 %f, i64 signext %a, i64 signext %b) { +; CHECK-LABEL: @test1 +; CHECK-LABEL: BB2 +; CHECK: [[PHI:%.*]] = phi i64 [ %a, %entry ], [ %b, %BB1 ] +; CHECK: and i64 [[PHI]], %b +entry: + br i1 %f, label %BB1, label %BB2 + +BB1: + %or = or i64 %a, %b + br label %BB2 + +BB2: + %phi = phi i64 [ %a, %entry ], [ %or, %BB1 ] + %and = and i64 %phi, %b + ret i64 %and +} + +define signext i64 @test2(i1 %f, i64 signext %a, i64 signext %b) { +; a test case for not creating a clone phi node to avoid code size bloat +; CHECK-LABEL: @test2 +; CHECK-LABEL: BB2 +; CHECK: [[NewPHI:%.*]] = phi i64 [ %b, %entry ], [ %or, %BB1 ] +entry: + br i1 %f, label %BB1, label %BB2 + +BB1: + %or = or i64 %a, 1 + br label %BB2 + +BB2: + %phi = phi i64 [ %b, %entry ], [ %or, %BB1 ] + %and = and i64 %phi, 1 + %add = add i64 %and, %phi + ret i64 %add +} + +define signext i64 @test3(i1 %f, i32 signext %a, i64 signext %b) { +; a test case for creating a clone phi node +; CHECK-LABEL: @test3 +; CHECK-LABEL: BB3 +; CHECK: [[PHI:%.*]] = phi i64 [ %2, %BB1 ], [ %b, %BB2 ] +; CHECK: and i64 [[PHI]], %b +entry: + br i1 %f, label %BB1, label %BB2 + +BB1: + %0 = zext i32 %a to i64 + %1 = shl nuw i64 %0, 32 + %2 = or i64 %1, 1 + br label %BB3 + +BB2: + %3 = zext i32 %a to i64 + %4 = shl nuw i64 %3, 32 + %5 = or i64 %4, %b + br label %BB3 + +BB3: + %phi = phi i64 [ %2, %BB1 ], [ %5, %BB2 ] + %and = and i64 %phi, %b + %add = add i64 %and, %b + ret i64 %add +} + +define signext i32 @test4(i1 %f, i16 signext %a, i32 signext %b) { +; a test case for creating a clone phi node +; CHECK-LABEL: @test4 +; CHECK-LABEL: BB3 +; CHECK: [[PHI:%.*]] = phi i32 [ %2, %BB1 ], [ %b, %BB2 ] +; CHECK: and i32 [[PHI]], %b +entry: + br i1 %f, label %BB1, label %BB2 + +BB1: + %0 = zext i16 %a to i32 + %1 = shl nuw i32 %0, 16 + %2 = or i32 %1, 1 + br label %BB3 + +BB2: + %3 = zext i16 %a to i32 + %4 = shl nuw i32 %3, 16 + %5 = or i32 %4, %b + br label %BB3 + +BB3: + %phi = phi i32 [ %2, %BB1 ], [ %5, %BB2 ] + %and = and i32 %phi, %b + %add = add i32 %and, %b + ret i32 %add +} + +define signext i32 @testBI(i32 signext %v) { +; Test with std::pair +; based on the following C++ code +; std::pair callee(int v) { +; int a = dummy(v); +; if (a) return std::make_pair(true, dummy(a)); +; else return std::make_pair(v < 0, v); +; } +; int func(int v) { +; std::pair rc = callee(v); +; if (rc.first) dummy(0); +; return rc.second; +; } + +; CHECK-LABEL: @testBI +; CHECK-LABEL: _ZL6calleei.exit +; CHECK: [[PHI:%.*]] = phi i1 [ false, %if.then.i ], [ [[V:%.*]], %if.else.i ] +; CHECK: br i1 [[PHI]], label %if.end, label %if.then +entry: + %call.i = call signext i32 @dummy(i32 signext %v) + %tobool.i = icmp eq i32 %call.i, 0 + br i1 %tobool.i, label %if.else.i, label %if.then.i + +if.then.i: ; preds = %entry + %call2.i = call signext i32 @dummy(i32 signext %call.i) + %retval.sroa.22.0.insert.ext.i.i = zext i32 %call2.i to i64 + %retval.sroa.22.0.insert.shift.i.i = shl nuw i64 %retval.sroa.22.0.insert.ext.i.i, 32 + %retval.sroa.0.0.insert.insert.i.i = or i64 %retval.sroa.22.0.insert.shift.i.i, 1 + br label %_ZL6calleei.exit + +if.else.i: ; preds = %entry + %.lobit.i = lshr i32 %v, 31 + %0 = zext i32 %.lobit.i to i64 + %retval.sroa.22.0.insert.ext.i8.i = zext i32 %v to i64 + %retval.sroa.22.0.insert.shift.i9.i = shl nuw i64 %retval.sroa.22.0.insert.ext.i8.i, 32 + %retval.sroa.0.0.insert.insert.i11.i = or i64 %retval.sroa.22.0.insert.shift.i9.i, %0 + br label %_ZL6calleei.exit + +_ZL6calleei.exit: ; preds = %if.then.i, %if.else.i + %retval.sroa.0.0.i = phi i64 [ %retval.sroa.0.0.insert.insert.i.i, %if.then.i ], [ %retval.sroa.0.0.insert.insert.i11.i, %if.else.i ] + %rc.sroa.43.0.extract.shift = lshr i64 %retval.sroa.0.0.i, 32 + %rc.sroa.43.0.extract.trunc = trunc i64 %rc.sroa.43.0.extract.shift to i32 + %1 = and i64 %retval.sroa.0.0.i, 1 + %tobool = icmp eq i64 %1, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %_ZL6calleei.exit + %call1 = call signext i32 @dummy(i32 signext 0) + br label %if.end + +if.end: ; preds = %_ZL6calleei.exit, %if.then + ret i32 %rc.sroa.43.0.extract.trunc +} + +define signext i32 @testIB(i32 signext %v) { +; Test with std::pair +; based on the following C++ code +; std::pair callee(int v) { +; int a = dummy(v); +; if (a) return std::make_pair(dummy(v), true); +; else return std::make_pair(v, v < 0); +; } +; int func(int v) { +; std::pair rc = callee(v); +; if (rc.second) dummy(0); +; return rc.first; +; } + +; CHECK-LABEL: @testIB +; CHECK-LABEL: _ZL6calleei.exit +; CHECK: [[PHI:%.*]] = phi i1 [ false, %if.then.i ], [ [[V:%.*]], %if.else.i ] +; CHECK: br i1 [[PHI]], label %if.end, label %if.then +entry: + %call.i = call signext i32 @dummy(i32 signext %v) + %tobool.i = icmp eq i32 %call.i, 0 + br i1 %tobool.i, label %if.else.i, label %if.then.i + +if.then.i: ; preds = %entry + %call1.i = call signext i32 @dummy(i32 signext %v) + %retval.sroa.0.0.insert.ext.i.i = zext i32 %call1.i to i64 + %retval.sroa.0.0.insert.insert.i.i = or i64 %retval.sroa.0.0.insert.ext.i.i, 4294967296 + br label %_ZL6calleei.exit + +if.else.i: ; preds = %entry + %.lobit.i = lshr i32 %v, 31 + %0 = zext i32 %.lobit.i to i64 + %retval.sroa.2.0.insert.shift.i8.i = shl nuw nsw i64 %0, 32 + %retval.sroa.0.0.insert.ext.i9.i = zext i32 %v to i64 + %retval.sroa.0.0.insert.insert.i10.i = or i64 %retval.sroa.2.0.insert.shift.i8.i, %retval.sroa.0.0.insert.ext.i9.i + br label %_ZL6calleei.exit + +_ZL6calleei.exit: ; preds = %if.then.i, %if.else.i + %retval.sroa.0.0.i = phi i64 [ %retval.sroa.0.0.insert.insert.i.i, %if.then.i ], [ %retval.sroa.0.0.insert.insert.i10.i, %if.else.i ] + %rc.sroa.0.0.extract.trunc = trunc i64 %retval.sroa.0.0.i to i32 + %1 = and i64 %retval.sroa.0.0.i, 4294967296 + %tobool = icmp eq i64 %1, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %_ZL6calleei.exit + %call1 = call signext i32 @dummy(i32 signext 0) + br label %if.end + +if.end: ; preds = %_ZL6calleei.exit, %if.then + ret i32 %rc.sroa.0.0.extract.trunc +} + +declare signext i32 @dummy(i32 signext %v)