diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3250,8 +3250,6 @@ case Intrinsic::fshr: if (II->getArgOperand(0) == II->getArgOperand(1)) { // (rot X, ?) == 0/-1 --> X == 0/-1 - // TODO: This transform is safe to re-use undef elts in a vector, but - // the constant value passed in by the caller doesn't allow that. if (C.isZero() || C.isAllOnes()) return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1)); @@ -3402,6 +3400,8 @@ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Constant *RHSC = dyn_cast(Op1); Instruction *LHSI = dyn_cast(Op0); + const ICmpInst::Predicate Pred = I.getPredicate(); + if (!RHSC || !LHSI) return nullptr; @@ -3430,7 +3430,6 @@ I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); break; - case Instruction::Load: // Try to optimize things like "A[i] > 4" to index computations. if (GetElementPtrInst *GEP = @@ -3442,6 +3441,25 @@ break; } + if (auto *II = dyn_cast(I.getOperand(0))) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::fshl: + case Intrinsic::fshr: + if (II->getArgOperand(0) == II->getArgOperand(1)) { + // (rot X, ?) == 0/-1 --> X == 0/-1 + // This transform is copied from foldICmpEqIntrinsicWithConstant, + // it is safe to re-use undef elts in a vector. + if (RHSC->getType()->isVectorTy() && + (RHSC->getSplatValue(/*AllowUndefs*/ true)->isZeroValue() || + RHSC->getSplatValue(/*AllowUndefs*/ true)->isAllOnesValue())) + return new ICmpInst(Pred, II->getArgOperand(0), I.getOperand(1)); + } + break; + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/icmp-fsh.ll b/llvm/test/Transforms/InstCombine/icmp-fsh.ll --- a/llvm/test/Transforms/InstCombine/icmp-fsh.ll +++ b/llvm/test/Transforms/InstCombine/icmp-fsh.ll @@ -53,12 +53,19 @@ ret <2 x i1> %r } -; TODO: We filter out vector constants with undef elts, but that isn't needed for this transform. +define <2 x i1> @rotl_ne_n0_undef(<2 x i5> %x, <2 x i5> %y) { +; CHECK-LABEL: @rotl_ne_n0_undef( +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %rot = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5>%x, <2 x i5> %x, <2 x i5> %y) + %r = icmp ne <2 x i5> %rot, + ret <2 x i1> %r +} define <2 x i1> @rotl_ne_n1_undef(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @rotl_ne_n1_undef( -; CHECK-NEXT: [[ROT:%.*]] = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> [[X:%.*]], <2 x i5> [[X]], <2 x i5> [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[ROT]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rot = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5>%x, <2 x i5> %x, <2 x i5> %y)