Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1480,6 +1480,40 @@ } } + /* + Try to fold a select of bittest. There are two patterns: + %2 = and i32 %X, %Y # Mask A + %3 = icmp eq i32 %2, 0 + %4 = and i32 %X, 1 # Mask B + %5 = select i1 %3, i32 %4, i32 1 + or + %2 = and i32 %X, %Y # Mask A + %3 = icmp eq i32 %2, 0 + %S = lshr i32 %X, C # C u< 32 + %5 = and i32 %S, 1 # Mask B + %6 = select i1 %3, i32 %5, i32 1 + */ + Value *X, *Y; + ICmpInst::Predicate EqPred; + if (match(CondVal, m_ICmp(EqPred, m_And(m_Value(X), m_Value(Y)), m_Zero())) && + ICmpInst::Predicate::ICMP_EQ == EqPred && match(FalseVal, m_One())) { + // The TrueVal can be one of: + // and (lshr %X, C), 1 + // and %X, 1 + Value *C = nullptr; + if (match(TrueVal, m_And(m_Specific(X), m_One())) || + (match(TrueVal, m_And(m_LShr(m_Specific(X), m_Value(C)), m_One())) && + match(C, m_NonNegative()))) { + // These will be folded by this very pass right away. + Value *Shift = (C != nullptr) ? C : ConstantInt::getNullValue(SelType); + Value *MaskB = Builder.CreateShl(ConstantInt::get(SelType, 1), Shift); + Value *FullMask = Builder.CreateOr(Y, MaskB); + Value *MaskedX = Builder.CreateAnd(X, FullMask); + Value *ICmpNeZero = Builder.CreateIsNotNull(MaskedX); + return new ZExtInst(ICmpNeZero, SelType); + } + } + // See if we are selecting two values based on a comparison of the two values. if (FCmpInst *FCI = dyn_cast(CondVal)) { if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { Index: test/Transforms/InstCombine/select-of-bittest.ll =================================================================== --- test/Transforms/InstCombine/select-of-bittest.ll +++ test/Transforms/InstCombine/select-of-bittest.ll @@ -8,12 +8,10 @@ define i32 @and_lshr_and(i32) { ; CHECK-LABEL: @and_lshr_and( -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[TMP4]] ; %2 = and i32 %0, 1 %3 = icmp eq i32 %2, 0 @@ -25,12 +23,10 @@ define <2 x i32> @and_lshr_and_splatvec(<2 x i32>) { ; CHECK-LABEL: @and_lshr_and_splatvec( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %2 = and <2 x i32> %0, %3 = icmp eq <2 x i32> %2, @@ -42,12 +38,10 @@ define <2 x i32> @and_lshr_and_vec_v0(<2 x i32>) { ; CHECK-LABEL: @and_lshr_and_vec_v0( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %2 = and <2 x i32> %0, ; mask is not splat %3 = icmp eq <2 x i32> %2, @@ -59,12 +53,10 @@ define <2 x i32> @and_lshr_and_vec_v1(<2 x i32>) { ; CHECK-LABEL: @and_lshr_and_vec_v1( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %2 = and <2 x i32> %0, %3 = icmp eq <2 x i32> %2, @@ -76,12 +68,10 @@ define <2 x i32> @and_lshr_and_vec_v2(<2 x i32>) { ; CHECK-LABEL: @and_lshr_and_vec_v2( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %2 = and <2 x i32> %0, ; mask is not splat %3 = icmp eq <2 x i32> %2, @@ -110,11 +100,10 @@ define i32 @and_and(i32) { ; CHECK-LABEL: @and_and( -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 1 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: ret i32 [[TMP4]] ; %2 = and i32 %0, 2 %3 = icmp eq i32 %2, 0 @@ -125,11 +114,10 @@ define <2 x i32> @and_and_splatvec(<2 x i32>) { ; CHECK-LABEL: @and_and_splatvec( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %2 = and <2 x i32> %0, %3 = icmp eq <2 x i32> %2, @@ -140,11 +128,10 @@ define <2 x i32> @and_and_vec(<2 x i32>) { ; CHECK-LABEL: @and_and_vec( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %2 = and <2 x i32> %0, ; mask is not splat %3 = icmp eq <2 x i32> %2, @@ -174,12 +161,11 @@ define i32 @f_var0(i32, i32) { ; CHECK-LABEL: @f_var0( -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0:%.*]], [[TMP1:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], i32 [[TMP6]], i32 1 -; CHECK-NEXT: ret i32 [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1:%.*]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], [[TMP0:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: ret i32 [[TMP6]] ; %3 = and i32 %0, %1 %4 = icmp eq i32 %3, 0 @@ -191,12 +177,11 @@ define <2 x i32> @f_var0_splatvec(<2 x i32>, <2 x i32>) { ; CHECK-LABEL: @f_var0_splatvec( -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP0:%.*]], [[TMP1:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP6]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], [[TMP0:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP6]] ; %3 = and <2 x i32> %0, %1 %4 = icmp eq <2 x i32> %3, @@ -208,12 +193,11 @@ define <2 x i32> @f_var0_vec(<2 x i32>, <2 x i32>) { ; CHECK-LABEL: @f_var0_vec( -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP0:%.*]], [[TMP1:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP6]], <2 x i32> -; CHECK-NEXT: ret <2 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], [[TMP0:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP6]] ; %3 = and <2 x i32> %0, %1 %4 = icmp eq <2 x i32> %3, @@ -242,10 +226,10 @@ define i32 @f_var1(i32, i32) { ; CHECK-LABEL: @f_var1( -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0:%.*]], [[TMP1:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1:%.*]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], [[TMP0:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; CHECK-NEXT: ret i32 [[TMP6]] ; %3 = and i32 %0, %1 @@ -257,10 +241,10 @@ define <2 x i32> @f_var1_vec(<2 x i32>, <2 x i32>) { ; CHECK-LABEL: @f_var1_vec( -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP0:%.*]], [[TMP1:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], [[TMP0:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP6]] ; %3 = and <2 x i32> %0, %1