Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1161,11 +1161,66 @@ return nullptr; } +static Instruction * +shrinkLogicWithZextOpAndPhiOp(BinaryOperator &Logic, + InstCombiner::BuilderTy &Builder) { + // Try to narrow a bitwise logic op with a zexted operand and a phi operand + // that may include the logic op itself and some number of constants: + // LogicOp phi [ C0, C1, ..., LogicOp ], (zext X) --> + // zext (LogicOp' phi [ C0', C1', ..., LogicOp'], X + auto *Phi = dyn_cast(Logic.getOperand(0)); + Value *X; + if (!Phi || !Phi->hasOneUse() || + !match(Logic.getOperand(1), m_OneUse(m_ZExt(m_Value(X))))) + return nullptr; + + // The phi's incoming values must be the logic instruction itself or constants + // that fit in the narrower source type. + Type *DestTy = Logic.getType(); + Type *SrcTy = X->getType(); + SmallVector NarrowOps; + for (Value *PhiVal : Phi->incoming_values()) { + if (auto *C = dyn_cast(PhiVal)) { + Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy); + Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy); + if (ZextTruncC != C) + return nullptr; + NarrowOps.push_back(TruncC); + } else if (PhiVal == &Logic) { + // Use X as a placeholder in the list. We'll replace it with a new narrow + // logic op if this transform works out. + NarrowOps.push_back(X); + } else { + return nullptr; + } + } + + // All incoming values are safe to shrink. Replace the phi and logic op with + // narrower versions. + unsigned NumPhiOps = Phi->getNumOperands(); + assert((NarrowOps.size() == NumPhiOps) && + "Unexpected value while narrowing phi"); + + // Phi nodes must be inserted at the start of a basic block. + Builder.SetInsertPoint(Phi); + PHINode *NarrowPhi = Builder.CreatePHI(SrcTy, NumPhiOps); + Builder.SetInsertPoint(&Logic); + Value *NarrowLogic = Builder.CreateBinOp(Logic.getOpcode(), NarrowPhi, X); + for (unsigned i = 0; i != NumPhiOps; ++i) { + Value *NarrowVal = NarrowOps[i] == X ? NarrowLogic : NarrowOps[i]; + NarrowPhi->addIncoming(NarrowVal, Phi->getIncomingBlock(i)); + } + return new ZExtInst(NarrowLogic, DestTy); +} + /// Fold {and,or,xor} (cast X), Y. Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { auto LogicOpc = I.getOpcode(); assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding"); + if (Instruction *Ret = shrinkLogicWithZextOpAndPhiOp(I, *Builder)) + return Ret; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); CastInst *Cast0 = dyn_cast(Op0); if (!Cast0) Index: test/Transforms/InstCombine/narrow.ll =================================================================== --- test/Transforms/InstCombine/narrow.ll +++ test/Transforms/InstCombine/narrow.ll @@ -97,7 +97,6 @@ ret <2 x i32> %trunc } -; FIXME: ; This is based on an 'any_of' loop construct. ; By narrowing the phi and logic op, we simplify away the zext and the final icmp. @@ -107,19 +106,17 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOUND:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDVAR]] to i64 -; CHECK-NEXT: [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDVAR]] to i64 +; CHECK-NEXT: [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[IDX]], align 4 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LD]], [[NEEDLE:%.*]] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP1]] to i8 -; CHECK-NEXT: [[OR]] = or i8 [[FOUND]], [[ZEXT]] +; CHECK-NEXT: [[TMP2]] = or i1 [[TMP0]], [[CMP1]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[OR]], 0 -; CHECK-NEXT: ret i1 [[TOBOOL]] +; CHECK-NEXT: ret i1 [[TMP2]] ; entry: br label %loop @@ -141,7 +138,6 @@ ret i1 %tobool } -; FIXME: ; This is based on an 'all_of' loop construct. ; By narrowing the phi and logic op, we simplify away the zext and the final icmp. @@ -151,18 +147,16 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOUND:%.*]] = phi i8 [ 1, [[ENTRY]] ], [ [[AND:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[INDVAR]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[IDX]], align 4 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LD]], [[HAY:%.*]] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP1]] to i8 -; CHECK-NEXT: [[AND]] = and i8 [[FOUND]], [[ZEXT]] +; CHECK-NEXT: [[TMP1]] = and i1 [[TMP0]], [[CMP1]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0 -; CHECK-NEXT: ret i1 [[TOBOOL]] +; CHECK-NEXT: ret i1 [[TMP1]] ; entry: br label %loop @@ -184,7 +178,6 @@ ret i1 %tobool } -; FIXME: ; Narrowing should work with an 'xor' and is not limited to bool types. define i32 @shrinkLogicAndPhi1(i8 %x, i1 %cond) { @@ -194,9 +187,9 @@ ; CHECK: if: ; CHECK-NEXT: br label [[ENDIF]] ; CHECK: endif: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i8 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], [[X:%.*]] +; CHECK-NEXT: [[LOGIC:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[LOGIC]] ; entry: @@ -211,8 +204,6 @@ } ; FIXME: -; Narrowing should work with an 'xor' and is not limited to bool types. -; FIXME: ; We should either canonicalize based on complexity or enhance the pattern matching to catch this commuted variant. define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) {