diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -510,8 +510,48 @@ return nullptr; // Where %B may be optionally shifted: lshr %X, %Z. - Value *X, *Z; - const bool HasShift = match(B, m_OneUse(m_LShr(m_Value(X), m_Value(Z)))); + Value *X; + Constant *Z; + bool HasShift = match(B, m_OneUse(m_LShr(m_Value(X), m_Constant(Z)))); + + // Check that Z is smaller than bitwidth. + // If it isn't, set HasShift to false. + if (HasShift) { + if (auto *CI = dyn_cast(Z)) { + HasShift &= CI->getValue().ult(X->getType()->getIntegerBitWidth()); + } else if (isa(Z)) { + // Simply fold Z to zero. + // Otherwise, MaskB (which is 1 << Z) becomes poison because Z is undef. + Z = ConstantInt::getNullValue(Z->getType()); + } else if (auto *VTy = dyn_cast(Z->getType())) { + unsigned BW = VTy->getElementType()->getIntegerBitWidth(); + SmallVector ZWithoutUndef; + + for (unsigned i = 0; i < VTy->getNumElements(); ++i) { + Constant *Elem = Z->getAggregateElement(i); + if (isa(Elem)) { + ZWithoutUndef.push_back(ConstantInt::get(Elem->getType(), 0)); + continue; + } + + auto *CI = dyn_cast(Elem); + if (!CI) { + HasShift = false; + break; + } + if (CI->getValue().uge(BW)) { + HasShift = false; + break; + } + ZWithoutUndef.push_back(CI); + } + if (HasShift) + Z = ConstantVector::get(ZWithoutUndef); + + } else { + HasShift = false; + } + } if (!HasShift) X = B; diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -82,7 +82,7 @@ define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) { ; CHECK-LABEL: @and_lshr_and_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[TMP4]] @@ -222,7 +222,7 @@ define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-LABEL: @f_var0_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32> @@ -298,16 +298,16 @@ } ; ============================================================================ ; -; Shift can be a variable, too. +; Shift can't be a variable in general ; ============================================================================ ; define i32 @f_var2(i32 %arg, i32 %arg1) { ; CHECK-LABEL: @f_var2( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 1, [[ARG1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP:%.*]] = and i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[ARG]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i32 [[TMP4]], i32 1 ; CHECK-NEXT: ret i32 [[TMP5]] ; %tmp = and i32 %arg, 1 @@ -320,11 +320,11 @@ define <2 x i32> @f_var2_splatvec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var2_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> , [[ARG1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP4]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP5]] ; %tmp = and <2 x i32> %arg, @@ -337,11 +337,11 @@ define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> , [[ARG1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP4]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP5]] ; %tmp = and <2 x i32> %arg, ; mask is not splat @@ -354,11 +354,11 @@ define <3 x i32> @f_var2_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-LABEL: @f_var2_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> , [[ARG1:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or <3 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <3 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = zext <3 x i1> [[TMP4]] to <3 x i32> +; CHECK-NEXT: [[TMP:%.*]] = and <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <3 x i32> [[TMP]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[ARG]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = and <3 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[TMP4]], <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[TMP5]] ; %tmp = and <3 x i32> %arg, @@ -375,11 +375,11 @@ define i32 @f_var3(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-LABEL: @f_var3( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 1, [[ARG2:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG1:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP:%.*]] = and i32 [[ARG:%.*]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[ARG]], [[ARG2:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP5]], i32 1 ; CHECK-NEXT: ret i32 [[TMP6]] ; %tmp = and i32 %arg, %arg1 @@ -393,11 +393,11 @@ ; Should be exactly as the previous one define i32 @f_var3_commutative_and(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-LABEL: @f_var3_commutative_and( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 1, [[ARG2:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG1:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP:%.*]] = and i32 [[ARG1:%.*]], [[ARG:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[ARG]], [[ARG2:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP5]], i32 1 ; CHECK-NEXT: ret i32 [[TMP6]] ; %tmp = and i32 %arg1, %arg ; in different order @@ -410,11 +410,11 @@ define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %arg2) { ; CHECK-LABEL: @f_var3_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> , [[ARG2:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[ARG1:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[ARG]], [[ARG2:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP6]] ; %tmp = and <2 x i32> %arg, %arg1 @@ -427,11 +427,11 @@ define <3 x i32> @f_var3_vec_undef(<3 x i32> %arg, <3 x i32> %arg1, <3 x i32> %arg2) { ; CHECK-LABEL: @f_var3_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> , [[ARG2:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = or <3 x i32> [[TMP1]], [[ARG1:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP2]], [[ARG:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <3 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = zext <3 x i1> [[TMP4]] to <3 x i32> +; CHECK-NEXT: [[TMP:%.*]] = and <3 x i32> [[ARG:%.*]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <3 x i32> [[ARG]], [[ARG2:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = and <3 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP5]], <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[TMP6]] ; %tmp = and <3 x i32> %arg, %arg1 @@ -605,10 +605,10 @@ define i32 @n6(i32 %arg) { ; CHECK-LABEL: @n6( ; CHECK-NEXT: [[TMP:%.*]] = and i32 [[ARG:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: [[TMP1_NOT:%.*]] = icmp eq i32 [[TMP]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[ARG]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1_NOT]], i32 1, i32 [[TMP3]] ; CHECK-NEXT: ret i32 [[TMP4]] ; %tmp = and i32 %arg, 1 @@ -622,9 +622,9 @@ define i32 @n7(i32 %arg) { ; CHECK-LABEL: @n7( ; CHECK-NEXT: [[TMP:%.*]] = and i32 [[ARG:%.*]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: [[TMP1_NOT:%.*]] = icmp eq i32 [[TMP]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1_NOT]], i32 1, i32 [[TMP2]] ; CHECK-NEXT: ret i32 [[TMP3]] ; %tmp = and i32 %arg, 2 @@ -639,10 +639,10 @@ define i32 @n8(i32 %arg) { ; CHECK-LABEL: @n8( ; CHECK-NEXT: [[TMP:%.*]] = and i32 [[ARG:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: [[TMP1_NOT:%.*]] = icmp eq i32 [[TMP]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[ARG]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1_NOT]], i32 1, i32 [[TMP3]] ; CHECK-NEXT: ret i32 [[TMP4]] ; %tmp = and i32 %arg, 1