Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5631,6 +5631,32 @@ return nullptr; } +/// If one operand of an icmp is effectively a bool (value range of {0,1}), +/// then try to reduce patterns based on that limit. +static Instruction *foldICmpUsingBoolRange(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + ICmpInst::Predicate Pred = I.getPredicate(); + + // Canonicalize zext operand as Op1. + Value *X; + if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && + X->getType()->isIntOrIntVectorTy(1)) { + std::swap(Op0, Op1); + Pred = I.getSwappedPredicate(); + } + + // Op0 must be 0 and bool must be true for "ULT": + // Op0 Op0 == 0 && X + if (Pred == ICmpInst::ICMP_ULT && match(Op1, m_OneUse(m_ZExt(m_Value(X)))) && + X->getType()->isIntOrIntVectorTy(1)) { + Value *Eq0 = Builder.CreateIsNull(Op0); + return BinaryOperator::CreateAnd(Eq0, X); + } + + return nullptr; +} + llvm::Optional> InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C) { @@ -6058,6 +6084,9 @@ if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; + if (Instruction *Res = foldICmpUsingBoolRange(I, Builder)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; Index: llvm/test/Transforms/InstCombine/icmp-range.ll =================================================================== --- llvm/test/Transforms/InstCombine/icmp-range.ll +++ llvm/test/Transforms/InstCombine/icmp-range.ll @@ -173,8 +173,8 @@ define i1 @ugt_zext(i1 %b, i8 %x) { ; CHECK-LABEL: @ugt_zext( -; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Z]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 0 +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -185,8 +185,8 @@ define <2 x i1> @ult_zext(<2 x i1> %b, <2 x i8> %p) { ; CHECK-LABEL: @ult_zext( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[X]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization @@ -195,6 +195,8 @@ ret <2 x i1> %r } +; negative test - need ult/ugt + define i1 @uge_zext(i1 %b, i8 %x) { ; CHECK-LABEL: @uge_zext( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -206,6 +208,8 @@ ret i1 %r } +; negative test - need ult/ugt + define i1 @ule_zext(i1 %b, i8 %p) { ; CHECK-LABEL: @ule_zext( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] @@ -219,6 +223,8 @@ ret i1 %r } +; negative test - extra use + define i1 @ugt_zext_use(i1 %b, i8 %x) { ; CHECK-LABEL: @ugt_zext_use( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -232,6 +238,8 @@ ret i1 %r } +; negative test - must be zext of i1 + define i1 @ult_zext_not_i1(i2 %b, i8 %x) { ; CHECK-LABEL: @ult_zext_not_i1( ; CHECK-NEXT: [[Z:%.*]] = zext i2 [[B:%.*]] to i8 @@ -243,11 +251,12 @@ ret i1 %r } +; sub is eliminated + define i1 @sub_ult_zext(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext( -; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 -; CHECK-NEXT: [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %z = zext i1 %b to i8 @@ -256,6 +265,8 @@ ret i1 %r } +; negative test - must be zext of i1 + define i1 @sub_ult_zext_not_i1(i2 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_not_i1( ; CHECK-NEXT: [[Z:%.*]] = zext i2 [[B:%.*]] to i8 @@ -269,6 +280,8 @@ ret i1 %r } +; negative test - extra use (but we could try harder to fold this) + define i1 @sub_ult_zext_use1(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @sub_ult_zext_use1( ; CHECK-NEXT: [[Z:%.*]] = zext i1 [[B:%.*]] to i8 @@ -286,10 +299,10 @@ define <2 x i1> @zext_ugt_sub_use2(<2 x i1> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @zext_ugt_sub_use2( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: call void @use_vec(<2 x i8> [[S]]) -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[Z]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %z = zext <2 x i1> %b to <2 x i8>