diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7889,11 +7889,11 @@ VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast(VL0)) { - VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, - Builder.getInt1Ty(), + auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); + VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, CI0->getPredicate(), CostKind, VL0); VecCost += TTI->getCmpSelInstrCost( - E->getOpcode(), ScalarTy, Builder.getInt1Ty(), + E->getOpcode(), VecTy, MaskTy, cast(E->getAltOp())->getPredicate(), CostKind, E->getAltOp()); } else { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll @@ -4,9 +4,16 @@ define i1 @degenerate() { ; CHECK-LABEL: define i1 @degenerate() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]]) -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x fp128> zeroinitializer, i32 0 +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt fp128 [[TMP0]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[CMP3:%.*]] = fcmp olt fp128 [[TMP0]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x fp128> zeroinitializer, i32 0 +; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt fp128 [[TMP1]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[OR_COND29:%.*]] = select i1 [[OR_COND]], i1 [[CMP6]], i1 false +; CHECK-NEXT: [[CMP10:%.*]] = fcmp olt fp128 [[TMP1]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[OR_COND30:%.*]] = select i1 [[OR_COND29]], i1 [[CMP10]], i1 false +; CHECK-NEXT: ret i1 [[OR_COND30]] ; entry: %0 = extractelement <4 x fp128> zeroinitializer, i32 0 @@ -25,13 +32,16 @@ ; CHECK-LABEL: define i1 @with_inputs ; CHECK-SAME: (<4 x fp128> [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x fp128> [[A]], <4 x fp128> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x fp128> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x fp128> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]]) -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x fp128> [[A]], i32 0 +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt fp128 [[TMP0]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[CMP3:%.*]] = fcmp olt fp128 [[TMP0]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x fp128> [[A]], i32 1 +; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt fp128 [[TMP1]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[OR_COND29:%.*]] = select i1 [[OR_COND]], i1 [[CMP6]], i1 false +; CHECK-NEXT: [[CMP10:%.*]] = fcmp olt fp128 [[TMP1]], 0xL00000000000000000000000000000000 +; CHECK-NEXT: [[OR_COND30:%.*]] = select i1 [[OR_COND29]], i1 [[CMP10]], i1 false +; CHECK-NEXT: ret i1 [[OR_COND30]] ; entry: %0 = extractelement <4 x fp128> %a, i32 0