diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3017,33 +3017,32 @@ /// where X is some kind of instruction. Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) { const APInt *C; - if (!match(Cmp.getOperand(1), m_APInt(C))) - return nullptr; - if (auto *BO = dyn_cast(Cmp.getOperand(0))) { - if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C)) - return I; - } + if (match(Cmp.getOperand(1), m_APInt(C))) { + if (auto *BO = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C)) + return I; - // Match against CmpInst LHS being instructions other than binary operators. + if (auto *SI = dyn_cast(Cmp.getOperand(0))) + // For now, we only support constant integers while folding the + // ICMP(SELECT)) pattern. We can extend this to support vector of integers + // similar to the cases handled by binary ops above. + if (auto *ConstRHS = dyn_cast(Cmp.getOperand(1))) + if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS)) + return I; - if (auto *SI = dyn_cast(Cmp.getOperand(0))) { - // For now, we only support constant integers while folding the - // ICMP(SELECT)) pattern. We can extend this to support vector of integers - // similar to the cases handled by binary ops above. - if (ConstantInt *ConstRHS = dyn_cast(Cmp.getOperand(1))) - if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS)) + if (auto *TI = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) return I; - } - if (auto *TI = dyn_cast(Cmp.getOperand(0))) { - if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) - return I; + if (auto *II = dyn_cast(Cmp.getOperand(0))) + if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) + return I; } - if (auto *II = dyn_cast(Cmp.getOperand(0))) - if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) - return I; + // There are many transforms where we can allow undef, we can solve it here. + if (match(Cmp.getOperand(1), m_APIntAllowUndef(C))) + return foldICmpInstWithConstantAllowUndef(Cmp, *C); return nullptr; } @@ -3200,12 +3199,6 @@ case Intrinsic::fshl: case Intrinsic::fshr: if (II->getArgOperand(0) == II->getArgOperand(1)) { - // (rot X, ?) == 0/-1 --> X == 0/-1 - // TODO: This transform is safe to re-use undef elts in a vector, but - // the constant value passed in by the caller doesn't allow that. - if (C.isZero() || C.isAllOnes()) - return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1)); - const APInt *RotAmtC; // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC) // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC) @@ -3278,6 +3271,33 @@ return nullptr; } +/// Try to fold integer comparisons with a constant operand: icmp Pred X, C +/// where X is some kind of instruction and C is AllowUndef. +/// TODO: Move more folds which allow undef to this function. +Instruction * +InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, + const APInt &C) { + const ICmpInst::Predicate Pred = Cmp.getPredicate(); + if (auto *II = dyn_cast(Cmp.getOperand(0))) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::fshl: + case Intrinsic::fshr: + if (Cmp.isEquality() && II->getArgOperand(0) == II->getArgOperand(1)) { + // (rot X, ?) == 0/-1 --> X == 0/-1 + // This transform is copied from foldICmpEqIntrinsicWithConstant, + // it is safe to re-use undef elts in a vector. + if (C.isZero() || C.isAllOnes()) + return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1)); + } + break; + } + } + + return nullptr; +} + /// Fold an icmp with BinaryOp and constant operand: icmp Pred BO, C. Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp, BinaryOperator *BO, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -674,6 +674,8 @@ Instruction *foldICmpWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp); + Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, + const APInt &C); Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); diff --git a/llvm/test/Transforms/InstCombine/icmp-fsh.ll b/llvm/test/Transforms/InstCombine/icmp-fsh.ll --- a/llvm/test/Transforms/InstCombine/icmp-fsh.ll +++ b/llvm/test/Transforms/InstCombine/icmp-fsh.ll @@ -53,12 +53,9 @@ ret <2 x i1> %r } -; TODO: We filter out vector constants with undef elts, but that isn't needed for this transform. - define <2 x i1> @rotl_ne_n1_undef(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @rotl_ne_n1_undef( -; CHECK-NEXT: [[ROT:%.*]] = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> [[X:%.*]], <2 x i5> [[X]], <2 x i5> [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[ROT]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rot = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5>%x, <2 x i5> %x, <2 x i5> %y)