Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4654,7 +4654,9 @@ /// *p = /// class HorizontalReduction { - SmallVector ReductionOps; + using ReductionOpsType = SmallVector; + using ReductionOpsListType = SmallVector; + ReductionOpsListType ReductionOps; SmallVector ReducedVals; // Use map vector to make stable output. MapVector ExtraArgs; @@ -4695,6 +4697,37 @@ (Kind == RK_UMin || Kind == RK_UMax))); } + /// Creates reduction operation with the current opcode. + Value *createOp(IRBuilder<> &Builder, const Twine &Name) const { + assert(isVectorizable() && + "Expected add|fadd or min/max reduction operation."); + Value *Cmp; + switch (Kind) { + case RK_Arithmetic: + return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, + Name); + case RK_Min: + Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) + : Builder.CreateFCmpOLT(LHS, RHS); + break; + case RK_Max: + Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) + : Builder.CreateFCmpOGT(LHS, RHS); + break; + case RK_UMin: + assert(Opcode == Instruction::ICmp && "Expected integer types."); + Cmp = Builder.CreateICmpULT(LHS, RHS); + break; + case RK_UMax: + assert(Opcode == Instruction::ICmp && "Expected integer types."); + Cmp = Builder.CreateICmpUGT(LHS, RHS); + break; + case RK_None: + llvm_unreachable("Unknown reduction operation."); + } + return Builder.CreateSelect(Cmp, LHS, RHS, Name); + } + public: explicit OperationData() = default; @@ -4748,6 +4781,30 @@ llvm_unreachable("Reduction kind is not set"); } + /// Checks if the operation has the same parent as \p P. + bool hasSameParent(Instruction *I, Value *P, bool IsRedOp) const { + assert(Kind != RK_None && !!*this && LHS && RHS && + "Expected reduction operation."); + if (!IsRedOp) + return I->getParent() == P; + switch (Kind) { + case RK_Arithmetic: + // Arithmetic reduction operation must be used once only. + return I->getParent() == P; + case RK_Min: + case RK_UMin: + case RK_Max: + case RK_UMax: { + // SelectInst must be used twice while the condition op must have single + // use only. + auto *Cmp = cast(cast(I)->getCondition()); + return I->getParent() == P && Cmp && Cmp->getParent() == P; + } + case RK_None: + break; + } + llvm_unreachable("Reduction kind is not set"); + } /// Expected number of uses for reduction operations/reduced values. unsigned getRequiredNumberOfUses() const { assert(Kind != RK_None && !!*this && LHS && RHS && @@ -4766,6 +4823,45 @@ llvm_unreachable("Reduction kind is not set"); } + /// Initializes the list of reduction operations. + void initReductionOps(ReductionOpsListType &ReductionOps) { + assert(Kind != RK_None && !!*this && LHS && RHS && + "Expected reduction operation."); + switch (Kind) { + case RK_Arithmetic: + ReductionOps.assign(1, ReductionOpsType()); + break; + case RK_Min: + case RK_UMin: + case RK_Max: + case RK_UMax: + ReductionOps.assign(2, ReductionOpsType()); + break; + case RK_None: + llvm_unreachable("Reduction kind is not set"); + } + } + /// Add all reduction operations for the reduction instruction \p I. + void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) { + assert(Kind != RK_None && !!*this && LHS && RHS && + "Expected reduction operation."); + switch (Kind) { + case RK_Arithmetic: + ReductionOps[0].emplace_back(I); + break; + case RK_Min: + case RK_UMin: + case RK_Max: + case RK_UMax: + if (cast(I)->getCondition()->hasOneUse()) + ReductionOps[0].emplace_back(cast(I)->getCondition()); + ReductionOps[1].emplace_back(I); + break; + case RK_None: + llvm_unreachable("Reduction kind is not set"); + } + } + /// Checks if instruction is associative and can be vectorized. bool isAssociative(Instruction *I) const { assert(Kind != RK_None && *this && LHS && RHS && @@ -4834,36 +4930,54 @@ llvm_unreachable("Reduction kind is not set"); } - /// Creates reduction operation with the current opcode. - Value *createOp(IRBuilder<> &Builder, const Twine &Name = "") const { + /// Creates reduction operation with the current opcode with the IR flags + /// from \p ReductionOps. + Value *createOp(IRBuilder<> &Builder, const Twine &Name, + const ReductionOpsListType &ReductionOps) const { assert(isVectorizable() && "Expected add|fadd or min/max reduction operation."); - Value *Cmp; + auto *Op = createOp(Builder, Name); switch (Kind) { case RK_Arithmetic: - return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, - Name); + propagateIRFlags(Op, ReductionOps[0]); + return Op; case RK_Min: - Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) - : Builder.CreateFCmpOLT(LHS, RHS); - break; case RK_Max: - Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) - : Builder.CreateFCmpOGT(LHS, RHS); - break; case RK_UMin: - assert(Opcode == Instruction::ICmp && "Expected integer types."); - Cmp = Builder.CreateICmpULT(LHS, RHS); - break; case RK_UMax: - assert(Opcode == Instruction::ICmp && "Expected integer types."); - Cmp = Builder.CreateICmpUGT(LHS, RHS); + propagateIRFlags(cast(Op)->getCondition(), ReductionOps[0]); + propagateIRFlags(Op, ReductionOps[1]); + return Op; + case RK_None: break; + } + llvm_unreachable("Unknown reduction operation."); + } + /// Creates reduction operation with the current opcode with the IR flags + /// from \p I. + Value *createOp(IRBuilder<> &Builder, const Twine &Name, + Instruction *I) const { + assert(isVectorizable() && + "Expected add|fadd or min/max reduction operation."); + auto *Op = createOp(Builder, Name); + switch (Kind) { + case RK_Arithmetic: + propagateIRFlags(Op, I); + return Op; + case RK_Min: + case RK_Max: + case RK_UMin: + case RK_UMax: + propagateIRFlags(cast(Op)->getCondition(), + cast(I)->getCondition()); + propagateIRFlags(Op, I); + return Op; case RK_None: - llvm_unreachable("Unknown reduction operation."); + break; } - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + llvm_unreachable("Unknown reduction operation."); } + TargetTransformInfo::ReductionFlags getFlags() const { TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; @@ -5000,6 +5114,7 @@ SmallVector, 32> Stack; Stack.push_back(std::make_pair(B, ReductionData.getFirstOperandIndex())); const unsigned NUses = ReductionData.getRequiredNumberOfUses(); + ReductionData.initReductionOps(ReductionOps); while (!Stack.empty()) { Instruction *TreeN = Stack.back().first; unsigned EdgeToVist = Stack.back().second++; @@ -5025,7 +5140,7 @@ markExtraArg(Stack[Stack.size() - 2], TreeN); ExtraArgs.erase(TreeN); } else - ReductionOps.push_back(TreeN); + ReductionData.addReductionOps(TreeN, ReductionOps); } // Retract. Stack.pop_back(); @@ -5043,8 +5158,10 @@ // reduced value class. if (I && (!ReducedValueData || OpData == ReducedValueData || OpData == ReductionData)) { + const bool IsReductionOperation = OpData == ReductionData; // Only handle trees in the current basic block. - if (I->getParent() != B->getParent()) { + if (!ReductionData.hasSameParent(I, B->getParent(), + IsReductionOperation)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; @@ -5058,7 +5175,7 @@ continue; } - if (OpData == ReductionData) { + if (IsReductionOperation) { // We need to be able to reassociate the reduction operations. if (!OpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). @@ -5112,12 +5229,15 @@ // to use it. for (auto &Pair : ExtraArgs) ExternallyUsedValues[Pair.second].push_back(Pair.first); + SmallVector IgnoreList; + for (auto &V : ReductionOps) + IgnoreList.append(V.begin(), V.end()); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); - V.buildTree(VL, ExternallyUsedValues, ReductionOps); + V.buildTree(VL, ExternallyUsedValues, IgnoreList); if (V.shouldReorder()) { SmallVector Reversed(VL.rbegin(), VL.rend()); - V.buildTree(Reversed, ExternallyUsedValues, ReductionOps); + V.buildTree(Reversed, ExternallyUsedValues, IgnoreList); } if (V.isTreeTinyAndNotFullyVectorizable()) break; @@ -5145,14 +5265,14 @@ // Emit a reduction. Value *ReducedSubTree = - emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps, TTI); + emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); if (VectorizedTree) { Builder.SetCurrentDebugLocation(Loc); OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, ReducedSubTree, ReductionData.getKind()); - VectorizedTree = VectReductionData.createOp(Builder, "op.rdx"); - propagateIRFlags(VectorizedTree, ReductionOps); + VectorizedTree = + VectReductionData.createOp(Builder, "op.rdx", ReductionOps); } else VectorizedTree = ReducedSubTree; i += ReduxWidth; @@ -5167,8 +5287,7 @@ OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, I, ReductionData.getKind()); - VectorizedTree = VectReductionData.createOp(Builder); - propagateIRFlags(VectorizedTree, ReductionOps); + VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps); } for (auto &Pair : ExternallyUsedValues) { assert(!Pair.second.empty() && @@ -5179,8 +5298,7 @@ OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, Pair.first, ReductionData.getKind()); - VectorizedTree = VectReductionData.createOp(Builder, "op.extra"); - propagateIRFlags(VectorizedTree, I); + VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I); } } // Update users. @@ -5264,8 +5382,7 @@ /// \brief Emit a horizontal reduction of the vectorized value. Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder, - unsigned ReduxWidth, ArrayRef RedOps, - const TargetTransformInfo *TTI) { + unsigned ReduxWidth, const TargetTransformInfo *TTI) { assert(VectorizedValue && "Need to have a vectorized tree node"); assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); @@ -5273,7 +5390,7 @@ if (!IsPairwiseReduction) return createSimpleTargetReduction( Builder, TTI, ReductionData.getOpcode(), VectorizedValue, - ReductionData.getFlags(), RedOps); + ReductionData.getFlags(), ReductionOps.back()); Value *TmpVec = VectorizedValue; for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { @@ -5289,8 +5406,7 @@ "rdx.shuf.r"); OperationData VectReductionData(ReductionData.getOpcode(), LeftShuf, RightShuf, ReductionData.getKind()); - TmpVec = VectReductionData.createOp(Builder, "op.rdx"); - propagateIRFlags(TmpVec, RedOps); + TmpVec = VectReductionData.createOp(Builder, "op.rdx", ReductionOps); } // The result is in the first element of the vector. Index: test/Transforms/SLPVectorizer/X86/horizontal-list.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -330,50 +330,42 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef ; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[TMP5]] +; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef ; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[TMP6]] +; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 ; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef -; CHECK-NEXT: store float [[TMP7]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 +; CHECK-NEXT: ret float [[TMP3]] ; ; THRESHOLD-LABEL: @bar( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 -; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] +; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef ; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[TMP5]] +; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef ; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[TMP6]] +; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 +; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 ; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef -; THRESHOLD-NEXT: store float [[TMP7]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP7]] +; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[TMP3]] ; entry: %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 Index: test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -36,27 +36,19 @@ ; ; AVX-LABEL: @maxi8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -66,33 +58,25 @@ ; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; AVX-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX-NEXT: ret i32 [[TMP24]] +; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; AVX-NEXT: ret i32 [[TMP16]] ; ; AVX2-LABEL: @maxi8( ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -102,33 +86,25 @@ ; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; AVX2-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX2-NEXT: ret i32 [[TMP24]] +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; AVX2-NEXT: ret i32 [[TMP16]] ; ; SKX-LABEL: @maxi8( ; SKX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; SKX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -138,9 +114,9 @@ ; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; SKX-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; SKX-NEXT: ret i32 [[TMP24]] +; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; SKX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; SKX-NEXT: ret i32 [[TMP16]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -169,101 +145,83 @@ define i32 @maxi16(i32) { ; CHECK-LABEL: @maxi16( -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP23]], i32 [[TMP24]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP26]], i32 [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8 -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP29]], i32 [[TMP30]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4 -; CHECK-NEXT: [[TMP34:%.*]] = icmp sgt i32 [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP32]], i32 [[TMP33]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16 -; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP35]], i32 [[TMP36]] -; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4 -; CHECK-NEXT: [[TMP40:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[TMP38]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8 -; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP41]], i32 [[TMP42]] -; CHECK-NEXT: [[TMP45:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4 -; CHECK-NEXT: [[TMP46:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[TMP44]], i32 [[TMP45]] -; CHECK-NEXT: ret i32 [[TMP47]] +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; CHECK-NEXT: ret i32 [[TMP32]] ; ; AVX-LABEL: @maxi16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; AVX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; AVX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -276,57 +234,41 @@ ; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX-NEXT: ret i32 [[TMP48]] +; AVX-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; AVX-NEXT: ret i32 [[TMP32]] ; ; AVX2-LABEL: @maxi16( ; AVX2-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; AVX2-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; AVX2-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -339,57 +281,41 @@ ; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX2-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX2-NEXT: ret i32 [[TMP48]] +; AVX2-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; AVX2-NEXT: ret i32 [[TMP32]] ; ; SKX-LABEL: @maxi16( ; SKX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +; SKX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -402,9 +328,9 @@ ; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; SKX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; SKX-NEXT: ret i32 [[TMP48]] +; SKX-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; SKX-NEXT: ret i32 [[TMP32]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -458,99 +384,67 @@ define i32 @maxi32(i32) { ; CHECK-LABEL: @maxi32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; CHECK-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; CHECK-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; CHECK-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; CHECK-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; CHECK-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; CHECK-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; CHECK-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; CHECK-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; CHECK-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; CHECK-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; CHECK-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; CHECK-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; CHECK-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef +; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef +; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef +; CHECK-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef +; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef +; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef +; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef +; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef +; CHECK-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef +; CHECK-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef +; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef +; CHECK-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef +; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef +; CHECK-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef +; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef +; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef +; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef +; CHECK-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef +; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef +; CHECK-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -566,105 +460,73 @@ ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; CHECK-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; CHECK-NEXT: ret i32 [[TMP96]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef +; CHECK-NEXT: ret i32 [[TMP64]] ; ; AVX-LABEL: @maxi32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; AVX-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; AVX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; AVX-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; AVX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; AVX-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; AVX-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; AVX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; AVX-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; AVX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; AVX-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; AVX-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; AVX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; AVX-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; AVX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; AVX-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; AVX-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; AVX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; AVX-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; AVX-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; AVX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; AVX-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; AVX-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; AVX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; AVX-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; AVX-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; AVX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; AVX-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; AVX-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; AVX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; AVX-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; AVX-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; AVX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; AVX-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; AVX-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; AVX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; AVX-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; AVX-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; AVX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; AVX-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; AVX-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; AVX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; AVX-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; AVX-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; AVX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; AVX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef +; AVX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; AVX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef +; AVX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef +; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef +; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef +; AVX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef +; AVX-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef +; AVX-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef +; AVX-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef +; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef +; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef +; AVX-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef +; AVX-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef +; AVX-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef +; AVX-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef +; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef +; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef +; AVX-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef +; AVX-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef +; AVX-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef +; AVX-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef +; AVX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef +; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef +; AVX-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef +; AVX-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef +; AVX-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef +; AVX-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef +; AVX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef +; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef +; AVX-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef +; AVX-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef +; AVX-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -680,105 +542,73 @@ ; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; AVX-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; AVX-NEXT: ret i32 [[TMP96]] +; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef +; AVX-NEXT: ret i32 [[TMP64]] ; ; AVX2-LABEL: @maxi32( ; AVX2-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX2-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; AVX2-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; AVX2-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; AVX2-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; AVX2-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; AVX2-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; AVX2-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; AVX2-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; AVX2-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; AVX2-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; AVX2-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; AVX2-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; AVX2-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; AVX2-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; AVX2-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; AVX2-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; AVX2-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; AVX2-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; AVX2-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; AVX2-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; AVX2-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; AVX2-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; AVX2-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; AVX2-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; AVX2-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; AVX2-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; AVX2-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; AVX2-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; AVX2-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; AVX2-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; AVX2-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; AVX2-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; AVX2-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; AVX2-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; AVX2-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; AVX2-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; AVX2-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; AVX2-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; AVX2-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; AVX2-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; AVX2-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; AVX2-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; AVX2-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; AVX2-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; AVX2-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; AVX2-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef +; AVX2-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; AVX2-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef +; AVX2-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef +; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef +; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef +; AVX2-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef +; AVX2-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef +; AVX2-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef +; AVX2-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef +; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef +; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef +; AVX2-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef +; AVX2-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef +; AVX2-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef +; AVX2-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef +; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef +; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef +; AVX2-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef +; AVX2-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef +; AVX2-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef +; AVX2-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef +; AVX2-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef +; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef +; AVX2-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef +; AVX2-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef +; AVX2-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef +; AVX2-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef +; AVX2-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef +; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef +; AVX2-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef +; AVX2-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef +; AVX2-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -794,105 +624,73 @@ ; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; AVX2-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX2-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; AVX2-NEXT: ret i32 [[TMP96]] +; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX2-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef +; AVX2-NEXT: ret i32 [[TMP64]] ; ; SKX-LABEL: @maxi32( ; SKX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; SKX-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; SKX-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; SKX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; SKX-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; SKX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; SKX-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; SKX-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; SKX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; SKX-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; SKX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; SKX-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; SKX-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; SKX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; SKX-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; SKX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; SKX-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; SKX-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; SKX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; SKX-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; SKX-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; SKX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; SKX-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; SKX-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; SKX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; SKX-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; SKX-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; SKX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; SKX-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; SKX-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; SKX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; SKX-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; SKX-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; SKX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; SKX-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; SKX-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; SKX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; SKX-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; SKX-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; SKX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; SKX-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; SKX-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; SKX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; SKX-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; SKX-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] +; SKX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef +; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef +; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef +; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef +; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef +; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef +; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef +; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef +; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef +; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef +; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef +; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef +; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef +; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef +; SKX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef +; SKX-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef +; SKX-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef +; SKX-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef +; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef +; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef +; SKX-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef +; SKX-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef +; SKX-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef +; SKX-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef +; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef +; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef +; SKX-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef +; SKX-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef +; SKX-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef +; SKX-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef +; SKX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef +; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef +; SKX-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef +; SKX-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef +; SKX-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef +; SKX-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef +; SKX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef +; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef +; SKX-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef +; SKX-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef +; SKX-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -908,9 +706,9 @@ ; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; SKX-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; SKX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; SKX-NEXT: ret i32 [[TMP96]] +; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; SKX-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef +; SKX-NEXT: ret i32 [[TMP64]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -1037,27 +835,19 @@ ; ; AVX-LABEL: @maxf8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -1067,33 +857,25 @@ ; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; AVX-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX-NEXT: ret float [[TMP24]] +; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; AVX-NEXT: ret float [[TMP16]] ; ; AVX2-LABEL: @maxf8( ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -1103,33 +885,25 @@ ; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; AVX2-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX2-NEXT: ret float [[TMP24]] +; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; AVX2-NEXT: ret float [[TMP16]] ; ; SKX-LABEL: @maxf8( ; SKX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] +; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -1139,9 +913,9 @@ ; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; SKX-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; SKX-NEXT: ret float [[TMP24]] +; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; SKX-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; SKX-NEXT: ret float [[TMP16]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -1220,51 +994,35 @@ ; ; AVX-LABEL: @maxf16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; AVX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; AVX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; AVX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; AVX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; AVX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -1277,57 +1035,41 @@ ; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX-NEXT: ret float [[TMP48]] +; AVX-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; AVX-NEXT: ret float [[TMP32]] ; ; AVX2-LABEL: @maxf16( ; AVX2-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; AVX2-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; AVX2-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; AVX2-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; AVX2-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; AVX2-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -1340,57 +1082,41 @@ ; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX2-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX2-NEXT: ret float [[TMP48]] +; AVX2-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; AVX2-NEXT: ret float [[TMP32]] ; ; SKX-LABEL: @maxf16( ; SKX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] +; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; SKX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; SKX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; SKX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; SKX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; SKX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -1403,9 +1129,9 @@ ; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; SKX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; SKX-NEXT: ret float [[TMP48]] +; SKX-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; SKX-NEXT: ret float [[TMP32]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -1556,99 +1282,67 @@ ; ; AVX-LABEL: @maxf32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] -; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 -; AVX-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] -; AVX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef -; AVX-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 -; AVX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] -; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef -; AVX-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 -; AVX-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] -; AVX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef -; AVX-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 -; AVX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] -; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef -; AVX-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 -; AVX-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] -; AVX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef -; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 -; AVX-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] -; AVX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef -; AVX-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 -; AVX-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] -; AVX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef -; AVX-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 -; AVX-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] -; AVX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef -; AVX-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 -; AVX-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] -; AVX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef -; AVX-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 -; AVX-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] -; AVX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef -; AVX-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 -; AVX-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] -; AVX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef -; AVX-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 -; AVX-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] -; AVX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef -; AVX-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 -; AVX-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] -; AVX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef -; AVX-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 -; AVX-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] -; AVX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef -; AVX-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 -; AVX-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] -; AVX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef -; AVX-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 -; AVX-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; AVX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; AVX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; AVX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; AVX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; AVX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; AVX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; AVX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; AVX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; AVX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; AVX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef +; AVX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; AVX-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef +; AVX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef +; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef +; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef +; AVX-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef +; AVX-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef +; AVX-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef +; AVX-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef +; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef +; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef +; AVX-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef +; AVX-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef +; AVX-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef +; AVX-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef +; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef +; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef +; AVX-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef +; AVX-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef +; AVX-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef +; AVX-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef +; AVX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef +; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef +; AVX-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef +; AVX-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef +; AVX-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef +; AVX-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef +; AVX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef +; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef +; AVX-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef +; AVX-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef +; AVX-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -1664,105 +1358,73 @@ ; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; AVX-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef -; AVX-NEXT: ret float [[TMP96]] +; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef +; AVX-NEXT: ret float [[TMP64]] ; ; AVX2-LABEL: @maxf32( ; AVX2-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] -; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX2-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 -; AVX2-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] -; AVX2-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef -; AVX2-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 -; AVX2-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] -; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef -; AVX2-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 -; AVX2-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] -; AVX2-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef -; AVX2-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 -; AVX2-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] -; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef -; AVX2-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 -; AVX2-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] -; AVX2-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef -; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 -; AVX2-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] -; AVX2-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef -; AVX2-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 -; AVX2-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] -; AVX2-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef -; AVX2-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 -; AVX2-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] -; AVX2-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef -; AVX2-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 -; AVX2-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] -; AVX2-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef -; AVX2-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 -; AVX2-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] -; AVX2-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef -; AVX2-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 -; AVX2-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] -; AVX2-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef -; AVX2-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 -; AVX2-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] -; AVX2-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef -; AVX2-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 -; AVX2-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] -; AVX2-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef -; AVX2-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 -; AVX2-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] -; AVX2-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef -; AVX2-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 -; AVX2-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] -; AVX2-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef -; AVX2-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 -; AVX2-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; AVX2-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; AVX2-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; AVX2-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; AVX2-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; AVX2-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; AVX2-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; AVX2-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; AVX2-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; AVX2-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; AVX2-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef +; AVX2-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; AVX2-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef +; AVX2-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef +; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef +; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef +; AVX2-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef +; AVX2-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef +; AVX2-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef +; AVX2-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef +; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef +; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef +; AVX2-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef +; AVX2-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef +; AVX2-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef +; AVX2-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef +; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef +; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef +; AVX2-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef +; AVX2-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef +; AVX2-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef +; AVX2-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef +; AVX2-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef +; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef +; AVX2-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef +; AVX2-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef +; AVX2-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef +; AVX2-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef +; AVX2-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef +; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef +; AVX2-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef +; AVX2-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef +; AVX2-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -1778,105 +1440,73 @@ ; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; AVX2-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX2-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef -; AVX2-NEXT: ret float [[TMP96]] +; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX2-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef +; AVX2-NEXT: ret float [[TMP64]] ; ; SKX-LABEL: @maxf32( ; SKX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] -; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; SKX-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 -; SKX-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] -; SKX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef -; SKX-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 -; SKX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] -; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef -; SKX-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 -; SKX-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] -; SKX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef -; SKX-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 -; SKX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] -; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef -; SKX-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 -; SKX-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] -; SKX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef -; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 -; SKX-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] -; SKX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef -; SKX-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 -; SKX-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] -; SKX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef -; SKX-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 -; SKX-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] -; SKX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef -; SKX-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 -; SKX-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] -; SKX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef -; SKX-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 -; SKX-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] -; SKX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef -; SKX-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 -; SKX-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] -; SKX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef -; SKX-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 -; SKX-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] -; SKX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef -; SKX-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 -; SKX-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] -; SKX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef -; SKX-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 -; SKX-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] -; SKX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef -; SKX-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 -; SKX-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] -; SKX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef -; SKX-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 -; SKX-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] +; SKX-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef +; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef +; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef +; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef +; SKX-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef +; SKX-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef +; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef +; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef +; SKX-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef +; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef +; SKX-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef +; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef +; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef +; SKX-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef +; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef +; SKX-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef +; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef +; SKX-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef +; SKX-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef +; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef +; SKX-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef +; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef +; SKX-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef +; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef +; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef +; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef +; SKX-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef +; SKX-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef +; SKX-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef +; SKX-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef +; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef +; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef +; SKX-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef +; SKX-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef +; SKX-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef +; SKX-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef +; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef +; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef +; SKX-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef +; SKX-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef +; SKX-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef +; SKX-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef +; SKX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef +; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef +; SKX-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef +; SKX-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef +; SKX-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef +; SKX-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef +; SKX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef +; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef +; SKX-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef +; SKX-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef +; SKX-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -1892,9 +1522,9 @@ ; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; SKX-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; SKX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef -; SKX-NEXT: ret float [[TMP96]] +; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; SKX-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef +; SKX-NEXT: ret float [[TMP64]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -2027,40 +1657,38 @@ ; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] ; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] -; AVX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] -; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP10]], i32 [[TMP11]] +; AVX-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP13]], i32 [[TMP14]] +; AVX-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] -; AVX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] -; AVX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] -; AVX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] -; AVX-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 -; AVX-NEXT: store i32 [[TMP32]], i32* @var, align 8 -; AVX-NEXT: ret i32 [[TMP31]] +; AVX-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP11]] +; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP11]] +; AVX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP14]] +; AVX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP14]] +; AVX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP17]] +; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP17]] +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP18]], i32 [[TMP16]], i32 [[TMP17]] +; AVX-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP25]], [[TMP27]] +; AVX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP25]], i32 [[TMP27]] +; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; AVX-NEXT: store i32 [[TMP30]], i32* @var, align 8 +; AVX-NEXT: ret i32 [[TMP29]] ; ; AVX2-LABEL: @maxi8_mutiple_uses( ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 @@ -2068,40 +1696,38 @@ ; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] ; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] -; AVX2-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] -; AVX2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP10]], i32 [[TMP11]] +; AVX2-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP13]], i32 [[TMP14]] +; AVX2-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] -; AVX2-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] -; AVX2-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] -; AVX2-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] -; AVX2-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 -; AVX2-NEXT: store i32 [[TMP32]], i32* @var, align 8 -; AVX2-NEXT: ret i32 [[TMP31]] +; AVX2-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP11]] +; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP11]] +; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP14]] +; AVX2-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP14]] +; AVX2-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP17]] +; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP17]] +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP18]], i32 [[TMP16]], i32 [[TMP17]] +; AVX2-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP25]], [[TMP27]] +; AVX2-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP25]], i32 [[TMP27]] +; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; AVX2-NEXT: store i32 [[TMP30]], i32* @var, align 8 +; AVX2-NEXT: ret i32 [[TMP29]] ; ; SKX-LABEL: @maxi8_mutiple_uses( ; SKX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 @@ -2109,40 +1735,38 @@ ; SKX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] ; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] -; SKX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] -; SKX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef +; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef +; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; SKX-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SKX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +; SKX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP10]], i32 [[TMP11]] +; SKX-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP13]], i32 [[TMP14]] +; SKX-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SKX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] ; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] -; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] -; SKX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] -; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; SKX-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SKX-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] -; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] -; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 -; SKX-NEXT: store i32 [[TMP32]], i32* @var, align 8 -; SKX-NEXT: ret i32 [[TMP31]] +; SKX-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP11]] +; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP11]] +; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP14]] +; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP14]] +; SKX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP17]] +; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP17]] +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP18]], i32 [[TMP16]], i32 [[TMP17]] +; SKX-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP25]], [[TMP27]] +; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP25]], i32 [[TMP27]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; SKX-NEXT: store i32 [[TMP30]], i32* @var, align 8 +; SKX-NEXT: ret i32 [[TMP29]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -2207,37 +1831,33 @@ ; AVX: pp: ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP5]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP5]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; AVX-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP19]] -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP19]] -; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], [[TMP22]] -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP22]] -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP5]] -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP5]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 [[TMP22]] +; AVX-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] +; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] +; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] +; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] +; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] +; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] ; AVX-NEXT: ret i32 [[OP_EXTRA]] ; ; AVX2-LABEL: @maxi8_wrong_parent( @@ -2248,37 +1868,33 @@ ; AVX2: pp: ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP5]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP5]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef +; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef +; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef +; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef +; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef +; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef +; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef +; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef +; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; AVX2-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP19]] -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], [[TMP22]] -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP22]] -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP5]] -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP5]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 [[TMP22]] +; AVX2-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]] +; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]] +; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]] +; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]] +; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]] +; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] ; AVX2-NEXT: ret i32 [[OP_EXTRA]] ; ; SKX-LABEL: @maxi8_wrong_parent( @@ -2289,45 +1905,41 @@ ; SKX-NEXT: br label [[PP:%.*]] ; SKX: pp: ; SKX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SKX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; SKX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SKX-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SKX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP8]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP8]] -; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], [[TMP9]] -; SKX-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> undef, i1 [[TMP13]], i32 0 -; SKX-NEXT: [[TMP15:%.*]] = insertelement <2 x i1> [[TMP14]], i1 [[TMP5]], i32 1 -; SKX-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0 -; SKX-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP3]], i32 1 -; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP9]], i32 0 -; SKX-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP4]], i32 1 -; SKX-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP17]], <2 x i32> [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1 -; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP7]] -; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 undef -; SKX-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP23]], i32 undef -; SKX-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] -; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP26]], i32 undef -; SKX-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP29]], i32 undef -; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP8]] -; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP8]] -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], [[TMP9]] -; SKX-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 -; SKX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], [[TMP21]] -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP21]] -; SKX-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 [[TMP9]] +; SKX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SKX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; SKX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; SKX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; SKX-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> undef, i1 [[TMP12]], i32 0 +; SKX-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1 +; SKX-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i32 0 +; SKX-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1 +; SKX-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> undef, i32 [[TMP8]], i32 0 +; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1 +; SKX-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]] +; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 +; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef +; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef +; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef +; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef +; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef +; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef +; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef +; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef +; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]] +; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP7]] +; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]] +; SKX-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 +; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP20]] +; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP8]] ; SKX-NEXT: ret i32 [[OP_EXTRA]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16