diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9318,6 +9318,63 @@ continue; OpsChanged |= tryToVectorize(I, R); } + // Try to vectorize list of compares. + // Sort by type, compare predicate, etc. + // TODO: Add analysis on the operand opcodes (profitable to vectorize + // instructions with same/alternate opcodes/const values). + auto &&CompareSorter = [&R](Value *V, Value *V2) { + auto *CI1 = cast(V); + auto *CI2 = cast(V2); + if (R.isDeleted(CI2) || !isValidElementType(CI2->getType())) + return false; + if (CI1->getOperand(0)->getType()->getTypeID() < + CI2->getOperand(0)->getType()->getTypeID()) + return true; + if (CI1->getOperand(0)->getType()->getTypeID() > + CI2->getOperand(0)->getType()->getTypeID()) + return false; + return CI1->getPredicate() < CI2->getPredicate() || + (CI1->getPredicate() > CI2->getPredicate() && + CI1->getPredicate() < + CmpInst::getSwappedPredicate(CI2->getPredicate())); + }; + + auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) { + if (V1 == V2) + return true; + auto *CI1 = cast(V1); + auto *CI2 = cast(V2); + if (R.isDeleted(CI2) || !isValidElementType(CI2->getType())) + return false; + if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType()) + return false; + return CI1->getPredicate() == CI2->getPredicate() || + CI1->getPredicate() == + CmpInst::getSwappedPredicate(CI2->getPredicate()); + }; + auto Limit = [&R](Value *V) { + unsigned EltSize = R.getVectorElementSize(V); + return std::max(2U, R.getMaxVecRegSize() / EltSize); + }; + + SmallVector Vals(PostponedCmps.begin(), PostponedCmps.end()); + OpsChanged |= tryToVectorizeSequence( + Vals, Limit, CompareSorter, AreCompatibleCompares, + [this, &R](ArrayRef Candidates, bool LimitForRegisterSize) { + // Exclude possible reductions from other blocks. + bool ArePossiblyReducedInOtherBlock = + any_of(Candidates, [](Value *V) { + return any_of(V->users(), [V](User *U) { + return isa(U) && + cast(U)->getParent() != + cast(V)->getParent(); + }); + }); + if (ArePossiblyReducedInOtherBlock) + return false; + return tryToVectorizeList(Candidates, R, LimitForRegisterSize); + }, + /*LimitForRegisterSize=*/true); Instructions.clear(); } else { // Insert in reverse order since the PostponedCmps vector was filled in diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -420,23 +420,22 @@ define float @test_merge_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_merge_anyof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[T]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[T]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[T]], i32 0 -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[T_FR]], -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 -; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP11]] -; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP14]] -; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP17]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[TMP0]], 255 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP20]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 +; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 +; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 +; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 +; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]] +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]] ; CHECK-NEXT: ret float [[RETVAL_0]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -279,19 +279,26 @@ ; CHECK-LABEL: @cmp_lt_gt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]] -; CHECK-NEXT: [[ADD:%.*]] = fsub double [[C:%.*]], [[B]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00 -; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[ADD]], [[MUL]] -; CHECK-NEXT: [[SUB:%.*]] = fsub double [[FNEG]], [[C]] -; CHECK-NEXT: [[DIV3:%.*]] = fdiv double [[SUB]], [[MUL]] -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[DIV]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[DIV3]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP4]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[CMP5:%.*]] = fcmp ule double [[DIV]], 1.000000e+00 -; CHECK-NEXT: [[CMP7:%.*]] = fcmp ule double [[DIV3]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP5]], i1 true, i1 [[CMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[TMP12]], i1 true, i1 [[TMP11]] ; CHECK-NEXT: br label [[CLEANUP]] ; CHECK: cleanup: ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[OR_COND1]], [[LOR_LHS_FALSE]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -132,6 +132,10 @@ ; MINTREESIZE-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 ; MINTREESIZE-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 ; MINTREESIZE-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0 +; MINTREESIZE-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP3]], i32 0 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> [[TMP1]], i1 [[CMP2]], i32 1 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> [[TMP2]], i1 [[CMP1]], i32 2 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[CMP0]], i32 3 ; MINTREESIZE-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] ; MINTREESIZE-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] ; MINTREESIZE-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] @@ -142,19 +146,19 @@ ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 -; MINTREESIZE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Q0]], i32 0 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Q1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q0]], i32 0 +; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q1]], i32 1 ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[Q3]], i32 1 +; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0 +; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q3]], i32 1 ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] -; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 +; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] -; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 +; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]]) ; MINTREESIZE-NEXT: ret <4 x float> undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -167,6 +167,10 @@ ; MINTREESIZE-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 ; MINTREESIZE-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 ; MINTREESIZE-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0 +; MINTREESIZE-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP3]], i32 0 +; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> [[TMP1]], i1 [[CMP2]], i32 1 +; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> [[TMP2]], i1 [[CMP1]], i32 2 +; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[CMP0]], i32 3 ; MINTREESIZE-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] ; MINTREESIZE-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] ; MINTREESIZE-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] @@ -177,19 +181,19 @@ ; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 ; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 -; MINTREESIZE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Q0]], i32 0 -; MINTREESIZE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Q1]], i32 1 +; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q0]], i32 0 +; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q1]], i32 1 ; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 ; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 -; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0 -; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[Q3]], i32 1 +; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q2]], i32 0 +; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q3]], i32 1 ; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] ; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] -; MINTREESIZE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 -; MINTREESIZE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0 +; MINTREESIZE-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] -; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 -; MINTREESIZE-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[Q5]], i32 1 +; MINTREESIZE-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[Q6]], i32 0 +; MINTREESIZE-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[Q5]], i32 1 ; MINTREESIZE-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] ; MINTREESIZE-NEXT: call void @llvm.assume(i1 [[QI]]) ; MINTREESIZE-NEXT: ret <4 x float> undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s -; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s +; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX declare void @use1(i1) @@ -93,19 +93,36 @@ } define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { -; CHECK-LABEL: @logical_and_icmp_diff_preds( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 -; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; CHECK-NEXT: ret i1 [[S3]] +; SSE-LABEL: @logical_and_icmp_diff_preds( +; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 +; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 +; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 +; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X3]], i32 0 +; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X1]], i32 1 +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer +; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP4]], i1 false +; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false +; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false +; SSE-NEXT: ret i1 [[S3]] +; +; AVX-LABEL: @logical_and_icmp_diff_preds( +; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 +; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 +; AVX-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 +; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0 +; AVX-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 +; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0 +; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false +; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false +; AVX-NEXT: ret i1 [[S3]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -144,17 +161,14 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) { ; CHECK-LABEL: @mixed_logical_icmp( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp sgt i32 [[X0]], 0 -; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[X1]], 0 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X3]], 0 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[C2]] -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false ; CHECK-NEXT: ret i1 [[S3]] ; %x0 = extractelement <4 x i32> %x, i32 0 @@ -200,21 +214,18 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[X]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[X]], -; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[TMP4]], 17 -; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[TMP3]], 17 -; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[TMP2]], 17 -; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[TMP1]], 17 -; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) -; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP7]], i1 [[D0]], i1 false -; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false -; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP4]], i1 [[TMP5]], i1 false +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 +; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP6]], i1 false +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 +; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP7]], i1 false +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 +; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP8]], i1 false ; CHECK-NEXT: ret i1 [[S7]] ; %x0 = extractelement <4 x i32> %x, i32 0